Reject non-unique PSU labels across strata with nest=False and fix test fixtures from PR #218 review (round 20)

igerber · claude · igerber · commit b6db0342c5f5 · 2026-03-21T11:06:30.000-04:00
Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/survey.py b/diff_diff/survey.py
@@ -156,19 +156,24 @@ def resolve(self, data: pd.DataFrame) -> "ResolvedSurveyDesign":
                 psu_arr = _factorize_cluster_ids(combined)
             else:
                 psu_arr = _factorize_cluster_ids(psu_raw)
+                # Validate PSU labels are globally unique when nest=False
+                # and strata are present. Repeated labels cause wrong n_psu,
+                # df_survey, and lonely_psu="adjust" global mean.
+                if strata_arr is not None:
+                    seen_psus: set = set()
+                    for h in np.unique(strata_arr):
+                        psu_in_h = set(psu_raw[strata_arr == h])
+                        overlap = seen_psus & psu_in_h
+                        if overlap:
+                            raise ValueError(
+                                f"PSU labels {overlap} appear in multiple strata. "
+                                "Set nest=True in SurveyDesign to make PSU IDs "
+                                "unique within strata, or use globally unique "
+                                "PSU labels."
+                            )
+                        seen_psus |= psu_in_h
 
-            # Count total PSUs: sum of unique PSUs within each stratum.
-            # When nest=True, labels are already globally unique so this
-            # is equivalent to len(np.unique(psu_arr)). When nest=False
-            # with strata, PSU labels may repeat across strata (common in
-            # survey data), so we count per-stratum to get the correct total.
-            if strata_arr is not None and not self.nest:
-                n_psu = sum(
-                    len(np.unique(psu_arr[strata_arr == h]))
-                    for h in np.unique(strata_arr)
-                )
-            else:
-                n_psu = len(np.unique(psu_arr))
+            n_psu = len(np.unique(psu_arr))
 
         # --- FPC ---
         fpc_arr = None
diff --git a/tests/test_survey.py b/tests/test_survey.py
@@ -118,7 +118,8 @@ def multiperiod_data():
     for unit in range(n_units):
         is_treated = unit < n_treated
         stratum = unit // 20  # 3 strata (20 units each)
-        psu = unit // 3  # 20 PSUs globally unique
+        psu_within = (unit % 20) // 5  # 4 PSUs within each stratum
+        psu = stratum * 4 + psu_within  # globally unique PSU ID
         wt = 1.0 + 0.4 * stratum
 
         for t in periods:
@@ -871,21 +872,16 @@ def test_nest_true(self):
             }
         )
 
-        # Without nest: PSU labels repeat but n_psu counts per-stratum
+        # nest=False rejects repeated PSU labels across strata
         sd_no_nest = SurveyDesign(weights="w", strata="s", psu="psu", nest=False)
-        resolved_no_nest = sd_no_nest.resolve(df)
+        with pytest.raises(ValueError, match="PSU labels.*multiple strata"):
+            sd_no_nest.resolve(df)
 
-        # With nest: PSU 0 in stratum 0 != PSU 0 in stratum 1
+        # nest=True makes them unique: PSU 0 in stratum 0 != PSU 0 in stratum 1
         sd_nest = SurveyDesign(weights="w", strata="s", psu="psu", nest=True)
         resolved_nest = sd_nest.resolve(df)
-
-        # Both should produce 20 PSUs (10 per stratum × 2 strata)
-        # nest=True makes globally unique codes; nest=False counts per-stratum
-        assert resolved_nest.n_psu == 20
-        assert resolved_no_nest.n_psu == 20
-        # df_survey should match: 20 - 2 = 18
-        assert resolved_nest.df_survey == 18
-        assert resolved_no_nest.df_survey == 18
+        assert resolved_nest.n_psu == 20  # 10 per stratum × 2 strata
+        assert resolved_nest.df_survey == 18  # 20 - 2
 
     def test_twfe_with_survey_design(self, twfe_panel_data):
         """TwoWayFixedEffects accepts and uses survey_design."""
@@ -3058,14 +3054,13 @@ def test_did_with_fpc_only_survey(self):
 
 
 class TestRound19Fixes:
-    """Tests for PR #218 review round 19: per-stratum PSU counting."""
+    """Tests for PR #218 review round 19: PSU nesting validation."""
 
-    def test_npsu_counts_per_stratum_with_repeated_labels(self):
-        """n_psu counts unique PSUs per stratum, not globally, when labels repeat."""
+    def test_repeated_psu_labels_nest_false_rejected(self):
+        """Repeated PSU labels across strata with nest=False are rejected."""
         n = 40
         strata = np.repeat([0, 1], 20)
-        # PSU IDs 0..9 repeat across both strata
-        psu_raw = np.tile(np.arange(10), 4)[:n]
+        psu_raw = np.tile(np.arange(10), 4)[:n]  # labels repeat
 
         df = pd.DataFrame(
             {
@@ -3075,15 +3070,43 @@ def test_npsu_counts_per_stratum_with_repeated_labels(self):
                 "psu": psu_raw,
             }
         )
-
-        # nest=False with repeated labels: should count 10+10=20 PSUs
         sd = SurveyDesign(weights="w", strata="s", psu="psu", nest=False)
+        with pytest.raises(ValueError, match="PSU labels.*multiple strata"):
+            sd.resolve(df)
+
+    def test_repeated_psu_labels_nest_true_accepted(self):
+        """Repeated PSU labels with nest=True produce correct n_psu."""
+        n = 40
+        strata = np.repeat([0, 1], 20)
+        psu_raw = np.tile(np.arange(10), 4)[:n]
+
+        df = pd.DataFrame(
+            {
+                "y": np.ones(n),
+                "w": np.ones(n),
+                "s": strata,
+                "psu": psu_raw,
+            }
+        )
+        sd = SurveyDesign(weights="w", strata="s", psu="psu", nest=True)
         resolved = sd.resolve(df)
-        assert resolved.n_psu == 20  # 10 per stratum × 2 strata
+        assert resolved.n_psu == 20  # 10 per stratum × 2
         assert resolved.df_survey == 18  # 20 - 2
 
-        # nest=True should give same result
-        sd_nest = SurveyDesign(weights="w", strata="s", psu="psu", nest=True)
-        resolved_nest = sd_nest.resolve(df)
-        assert resolved_nest.n_psu == 20
-        assert resolved_nest.df_survey == 18
+    def test_unique_psu_labels_nest_false_accepted(self):
+        """Globally unique PSU labels with nest=False work correctly."""
+        n = 40
+        strata = np.repeat([0, 1], 20)
+        psu_raw = np.arange(n) // 2  # 20 unique PSUs, no overlap
+
+        df = pd.DataFrame(
+            {
+                "y": np.ones(n),
+                "w": np.ones(n),
+                "s": strata,
+                "psu": psu_raw,
+            }
+        )
+        sd = SurveyDesign(weights="w", strata="s", psu="psu", nest=False)
+        resolved = sd.resolve(df)
+        assert resolved.n_psu == 20