Fix CI review R8: extend zero-weight contract to all validators + survey branch tests

igerber · claude · igerber · commit de8ff5e83290 · 2026-04-17T06:04:25.000-04:00
- P1 #1: The R5 zero-weight filter only ran inside the cell aggregation step, after the NaN/coercion checks for group/time/treatment/outcome. Moved the filter to the very top of _validate_and_aggregate_to_cells so validation only sees the effective sample. fit()'s controls, trends_nonparam, and heterogeneity blocks now also scope their NaN/time-invariance checks to positive-weight rows when survey_weights is active. Legitimate SurveyDesign.subpopulation() inputs with NaN in excluded rows now fit cleanly. TSL variance path is unchanged (zero-weight obs still contribute zero psi). - P2: 5 new regression tests in test_survey_dcdh.py — TestZeroWeightSubpopulation now covers NaN outcome and NaN het columns in excluded rows; new TestSurveyTrendsLinear / TestSurveyTrendsNonparam / TestSurveyDesign2 classes exercise survey_design combined with those previously-untested branches. All 262 targeted tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -157,6 +157,16 @@ def _validate_and_aggregate_to_cells(
 
     df = data.copy()
 
+    # 1a. SurveyDesign.subpopulation() contract: zero-weight rows are
+    # out-of-sample. Pre-filter them *before* any NaN/coercion validation
+    # so that invalid values in excluded rows do not abort the fit.
+    if weights is not None:
+        weights_arr = np.asarray(weights, dtype=np.float64)
+        pos_mask = weights_arr > 0
+        if not pos_mask.all():
+            df = df.loc[pos_mask].reset_index(drop=True)
+            weights = weights_arr[pos_mask]
+
     # 1b. Group and time NaN checks (before groupby, which silently drops NaN keys)
     n_nan_group = int(df[group].isna().sum())
     if n_nan_group > 0:
@@ -210,19 +220,8 @@ def _validate_and_aggregate_to_cells(
 
     # 5. Cell aggregation (compute min/max for within-cell check)
     if weights is not None:
-        # Zero-weight rows are out-of-sample (e.g., via
-        # SurveyDesign.subpopulation()). Pre-filter them before the
-        # groupby so that d_min/d_max/n_gt reflect the effective sample
-        # and a zero-weight obs cannot trip the within-cell treatment-
-        # constancy check or inflate downstream n_gt counts.
-        weights_arr = np.asarray(weights, dtype=np.float64)
-        pos_mask = weights_arr > 0
-        if not pos_mask.all():
-            df = df.loc[pos_mask].reset_index(drop=True)
-            weights_arr = weights_arr[pos_mask]
-        weights = weights_arr
-
-        # Survey-weighted cell aggregation.
+        # Survey-weighted cell aggregation (zero-weight rows already
+        # filtered upstream at step 1a).
         # y_gt = sum(w_i * y_i) / sum(w_i) within each (g, t) cell.
         # Treatment is constant within cells (checked below), so weighted
         # and unweighted means are identical for d_gt.
@@ -730,8 +729,17 @@ def fit(
                     f"Control column(s) {missing_controls!r} not found in "
                     f"data. Available columns: {list(data.columns)}"
                 )
-            # Work on a copy to avoid mutating the caller's DataFrame
-            data_controls = data[controls].copy()
+            # SurveyDesign.subpopulation() contract: zero-weight rows are
+            # out-of-sample. Scope NaN/Inf validation to positive-weight
+            # rows so that excluded obs with missing covariates do not
+            # abort the fit. The downstream weighted aggregation
+            # (sum(w*x)/sum(w)) handles zero-weight rows correctly on
+            # its own.
+            if survey_weights is not None:
+                pos_mask_ctrl = np.asarray(survey_weights) > 0
+                data_controls = data.loc[pos_mask_ctrl, controls].copy()
+            else:
+                data_controls = data[controls].copy()
             for c in controls:
                 try:
                     data_controls[c] = pd.to_numeric(data_controls[c])
@@ -1196,16 +1204,24 @@ def fit(
                     f"trends_nonparam column {set_col!r} not found in "
                     f"data. Available columns: {list(data.columns)}"
                 )
-            # Reject NaN/missing set assignments
-            n_na_set = int(data[set_col].isna().sum())
+            # SurveyDesign.subpopulation() contract: scope NaN and
+            # time-invariance validation to positive-weight rows so
+            # excluded obs with missing set IDs do not abort the fit.
+            if survey_weights is not None:
+                pos_mask_tnp = np.asarray(survey_weights) > 0
+                data_tnp = data.loc[pos_mask_tnp]
+            else:
+                data_tnp = data
+            # Reject NaN/missing set assignments (effective sample only)
+            n_na_set = int(data_tnp[set_col].isna().sum())
             if n_na_set > 0:
                 raise ValueError(
                     f"trends_nonparam column {set_col!r} contains "
                     f"{n_na_set} NaN/missing value(s). All groups must "
                     f"have a valid set assignment."
                 )
             # Aggregate set membership per group (must be time-invariant)
-            set_per_group = data.groupby(group)[set_col].nunique()
+            set_per_group = data_tnp.groupby(group)[set_col].nunique()
             time_varying = set_per_group[set_per_group > 1]
             if len(time_varying) > 0:
                 raise ValueError(
@@ -1217,7 +1233,7 @@ def fit(
             # Set partition must be coarser than group (multiple groups
             # per set). A group-level partition creates singleton sets
             # with no within-set controls available.
-            set_map_check = data.groupby(group)[set_col].first()
+            set_map_check = data_tnp.groupby(group)[set_col].first()
             n_sets = set_map_check.nunique()
             n_groups_total = len(set_map_check)
             if n_sets >= n_groups_total:
@@ -1229,7 +1245,7 @@ def fit(
                     f"within-set controls."
                 )
             # Extract set membership per group aligned with all_groups
-            set_map = data.groupby(group)[set_col].first()
+            set_map = data_tnp.groupby(group)[set_col].first()
             set_ids_arr = np.array(
                 [set_map.loc[g] for g in all_groups], dtype=object
             )
@@ -2376,16 +2392,24 @@ def fit(
                     "control-pool restrictions; the results would be "
                     "inconsistent with the fitted estimator."
                 )
-            # Extract per-group covariate (must be time-invariant)
-            het_per_group = data.groupby(group)[het_col].nunique()
+            # Extract per-group covariate (must be time-invariant).
+            # SurveyDesign.subpopulation() contract: scope time-invariance
+            # check to positive-weight rows so excluded obs with NaN/varying
+            # het values do not abort the fit.
+            if survey_weights is not None:
+                pos_mask_het = np.asarray(survey_weights) > 0
+                data_het = data.loc[pos_mask_het]
+            else:
+                data_het = data
+            het_per_group = data_het.groupby(group)[het_col].nunique()
             het_varying = het_per_group[het_per_group > 1]
             if len(het_varying) > 0:
                 raise ValueError(
                     f"heterogeneity column {het_col!r} must be "
                     f"time-invariant within each group. "
                     f"{len(het_varying)} group(s) have varying values."
                 )
-            het_map = data.groupby(group)[het_col].first()
+            het_map = data_het.groupby(group)[het_col].first()
             X_het = np.array(
                 [float(het_map.loc[g]) for g in all_groups]
             )
diff --git a/tests/test_survey_dcdh.py b/tests/test_survey_dcdh.py
@@ -841,3 +841,150 @@ def test_mixed_zero_weight_row_excluded_from_validation(self, base_data):
             survey_design=sd,
         )
         assert np.isfinite(result.overall_att)
+
+    def test_zero_weight_row_with_nan_outcome(self, base_data):
+        """A zero-weight row with NaN outcome must not trip the outcome
+        NaN validator. SurveyDesign.subpopulation() contract."""
+        df_ = base_data.copy()
+        df_["pw"] = 1.0
+        sample = df_.iloc[0].copy()
+        sample["outcome"] = np.nan
+        sample["pw"] = 0.0
+        df_ = pd.concat([df_, pd.DataFrame([sample])], ignore_index=True)
+        sd = SurveyDesign(weights="pw")
+        # Must succeed — zero-weight row with NaN outcome is out-of-sample
+        result = ChaisemartinDHaultfoeuille(seed=1).fit(
+            df_,
+            outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            survey_design=sd,
+        )
+        assert np.isfinite(result.overall_att)
+
+    def test_zero_weight_row_with_nan_heterogeneity(self, base_data):
+        """A zero-weight row with NaN in the heterogeneity column must
+        not trip the heterogeneity time-invariance validator."""
+        rng = np.random.default_rng(0)
+        df_ = base_data.copy()
+        df_["pw"] = 1.0
+        groups = sorted(df_["group"].unique())
+        het_map = {g: rng.uniform(-1, 1) for g in groups}
+        df_["x_het"] = df_["group"].map(het_map)
+        # Inject a zero-weight row with NaN het value for an existing group
+        sample = df_.iloc[0].copy()
+        sample["x_het"] = np.nan
+        sample["pw"] = 0.0
+        df_ = pd.concat([df_, pd.DataFrame([sample])], ignore_index=True)
+        sd = SurveyDesign(weights="pw")
+        # Must succeed — zero-weight row with NaN het is out-of-sample
+        result = ChaisemartinDHaultfoeuille(seed=1).fit(
+            df_,
+            outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            L_max=1, heterogeneity="x_het", survey_design=sd,
+        )
+        assert result.heterogeneity_effects is not None
+
+
+# ── Test: Survey + trends_linear ────────────────────────────────────
+
+
+class TestSurveyTrendsLinear:
+    """Survey-backed trends_linear fit must populate linear_trends_effects."""
+
+    def test_survey_trends_linear_runs(self, data_with_survey):
+        sd = SurveyDesign(weights="pw")
+        r = ChaisemartinDHaultfoeuille(seed=1).fit(
+            data_with_survey,
+            outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            L_max=2, trends_linear=True, survey_design=sd,
+        )
+        assert r.survey_metadata is not None
+        # linear_trends_effects populated per REGISTRY line 614 contract
+        assert r.linear_trends_effects is not None
+        # At least one horizon should be estimable with finite value
+        finite_horizons = [
+            h for h, entry in r.linear_trends_effects.items()
+            if np.isfinite(entry.get("effect", np.nan))
+        ]
+        assert len(finite_horizons) > 0, (
+            "expected at least one horizon with finite linear_trends_effect"
+        )
+
+
+# ── Test: Survey + trends_nonparam ──────────────────────────────────
+
+
+class TestSurveyTrendsNonparam:
+    """Survey-backed trends_nonparam fit must thread set-restrictions."""
+
+    def test_survey_trends_nonparam_runs(self, data_with_survey):
+        # Reuse stratum as set ID (time-invariant per group)
+        sd = SurveyDesign(weights="pw")
+        r = ChaisemartinDHaultfoeuille(seed=1).fit(
+            data_with_survey,
+            outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            L_max=2, trends_nonparam="stratum", survey_design=sd,
+        )
+        assert r.survey_metadata is not None
+        assert r.event_study_effects is not None
+        # Support trimming may reduce counts but at least one finite-SE
+        # horizon should remain on this fixture.
+        finite_ses = [
+            entry
+            for entry in r.event_study_effects.values()
+            if np.isfinite(entry.get("se", np.nan))
+        ]
+        assert len(finite_ses) > 0, (
+            "expected at least one event-study horizon with finite SE "
+            "under trends_nonparam + survey"
+        )
+
+
+# ── Test: Survey + design2 ──────────────────────────────────────────
+
+
+class TestSurveyDesign2:
+    """Survey-backed design2 fit must populate design2_effects."""
+
+    @staticmethod
+    def _make_join_then_leave_panel(seed=42, n_groups=30, n_periods=8):
+        """Panel with join-then-leave (Design-2) groups, matching the
+        existing design2 fixture in test_chaisemartin_dhaultfoeuille.py."""
+        rng = np.random.RandomState(seed)
+        rows = []
+        for g in range(n_groups):
+            group_fe = rng.normal(0, 2)
+            for t in range(n_periods):
+                if g < 10:
+                    d = 1 if 2 <= t < 5 else 0
+                elif g < 20:
+                    d = 1 if t >= 3 else 0
+                else:
+                    d = 0
+                y = group_fe + 2.0 * t + 5.0 * d + rng.normal(0, 0.3)
+                rows.append(
+                    {"group": g, "period": t, "treatment": d, "outcome": y, "pw": 1.0}
+                )
+        return pd.DataFrame(rows)
+
+    def test_survey_design2_runs(self):
+        df_ = self._make_join_then_leave_panel()
+        sd = SurveyDesign(weights="pw")
+        # drop_larger_lower=False keeps the 2-switch groups
+        r = ChaisemartinDHaultfoeuille(
+            seed=1, drop_larger_lower=False
+        ).fit(
+            df_,
+            outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            L_max=1, design2=True, survey_design=sd,
+        )
+        assert r.survey_metadata is not None
+        assert r.design2_effects is not None
+        assert r.design2_effects["n_design2_groups"] == 10
+        # switch_in and switch_out mean effects should be finite
+        assert np.isfinite(r.design2_effects["switch_in"]["mean_effect"])
+        assert np.isfinite(r.design2_effects["switch_out"]["mean_effect"])