Address second AI review round on survey tutorial

igerber · claude · igerber · commit 2dc1888b028c · 2026-04-01T08:33:39.000-04:00
- Fix StackedDiD support table: show "Full (pweight only)" for
  strata/PSU/FPC since it supports full TSL on composed weights,
  just restricted to pweight weight type (P1)
- Fix replicate weight section: remove misleading TSL vs JK1
  equivalence claim; show JK1 as standalone API demo and note
  that stratified replicates (JKn) are needed for stratified
  designs (P1)
- Add input validation for weight_variation and cohort_periods
  in generate_survey_did_data() with negative tests (P2)

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/prep_dgp.py b/diff_diff/prep_dgp.py
@@ -1220,6 +1220,14 @@ def generate_survey_did_data(
 
     if cohort_periods is None:
         cohort_periods = [3, 5]
+    if not cohort_periods:
+        raise ValueError("cohort_periods must be a non-empty list of integers")
+
+    valid_wv = ("none", "moderate", "high")
+    if weight_variation not in valid_wv:
+        raise ValueError(
+            f"weight_variation must be one of {valid_wv}, got {weight_variation!r}"
+        )
 
     # --- Survey structure: assign units to strata and PSUs ---
     n_psu_total = n_strata * psu_per_stratum
diff --git a/docs/tutorials/16_survey_did.ipynb b/docs/tutorials/16_survey_did.ipynb
@@ -486,21 +486,15 @@
     "    first_treat='first_treat', survey_design=sd_rep,\n",
     ")\n",
     "\n",
-    "# Compare TSL vs replicate SE\n",
-    "# (Use the same data with strata/PSU/FPC for TSL)\n",
-    "sd_tsl = SurveyDesign(weights='weight', strata='stratum', psu='psu', fpc='fpc')\n",
-    "cs_tsl = CallawaySantAnna(control_group='never_treated')\n",
-    "results_tsl = cs_tsl.fit(\n",
-    "    df_rep, outcome='outcome', unit='unit', time='period',\n",
-    "    first_treat='first_treat', survey_design=sd_tsl,\n",
-    ")\n",
-    "\n",
-    "print(f\"{'Method':20s} {'ATT':>10s} {'SE':>10s}\")\n",
-    "print(\"-\" * 42)\n",
-    "print(f\"{'TSL (strata/PSU/FPC)':20s} {results_tsl.overall_att:>10.4f} {results_tsl.overall_se:>10.4f}\")\n",
-    "print(f\"{'JK1 (replicate wts)':20s} {results_rep.overall_att:>10.4f} {results_rep.overall_se:>10.4f}\")\n",
-    "print(f\"\\nBoth methods correctly account for the survey design.\")\n",
-    "print(f\"In practice, use whichever your survey provides.\")"
+    "print(f\"Overall ATT: {results_rep.overall_att:.4f} (SE: {results_rep.overall_se:.4f})\")\n",
+    "print(f\"Survey d.f.: {results_rep.survey_metadata.df_survey}\")\n",
+    "print(f\"Replicate method: {results_rep.survey_metadata.replicate_method}\")\n",
+    "print(f\"Number of replicates: {results_rep.survey_metadata.n_replicates}\")\n",
+    "print()\n",
+    "print(\"Note: JK1 replicates are unstratified (global delete-one-PSU).\")\n",
+    "print(\"If your survey uses stratified sampling, stratified replicates (JKn)\")\n",
+    "print(\"provide design-consistent variance estimation. Use whichever method\")\n",
+    "print(\"your survey documentation specifies.\")"
    ]
   },
   {
@@ -697,7 +691,7 @@
     "| **CallawaySantAnna** | Full | Full | Full | Multiplier at PSU |\n",
     "| **TripleDifference** | Full | Full | Full (analytical) | -- |\n",
     "| **SunAbraham** | Full | Full | -- | Rao-Wu rescaled |\n",
-    "| **StackedDiD** | pweight only | pweight only | -- | -- |\n",
+    "| **StackedDiD** | pweight only | Full (pweight only) | -- | -- |\n",
     "| **ImputationDiD** | Full | Partial (no FPC) | -- | Multiplier at PSU |\n",
     "| **TwoStageDiD** | Full | Partial (no FPC) | -- | Multiplier at PSU |\n",
     "| **ContinuousDiD** | Full | Full | Full (analytical) | Multiplier at PSU |\n",
@@ -707,9 +701,10 @@
     "| **BaconDecomposition** | Diagnostic | Diagnostic | -- | -- |\n",
     "\n",
     "**Legend:**\n",
-    "- **Full**: Weights + strata/PSU/FPC + Taylor Series Linearization variance\n",
+    "- **Full**: Weights (all types) + strata/PSU/FPC + Taylor Series Linearization variance\n",
+    "- **Full (pweight only)**: Full TSL support with strata/PSU/FPC, but only accepts `pweight` weight type (`fweight`/`aweight` rejected because Q-weight composition changes their semantics)\n",
     "- **Partial (no FPC)**: Weights + strata (for df) + PSU (for clustering); FPC raises `NotImplementedError`\n",
-    "- **pweight only**: Sampling weights for point estimates; no strata/PSU/FPC\n",
+    "- **pweight only**: Sampling weights for point estimates; no strata/PSU/FPC design elements\n",
     "- **Diagnostic**: Weighted descriptive statistics only (no inference)\n",
     "- **--**: Not supported\n",
     "\n",
diff --git a/tests/test_prep.py b/tests/test_prep.py
@@ -1322,3 +1322,19 @@ def test_repeated_cross_section(self):
         assert data["unit"].nunique() == 60  # unique across all periods
         # No unit appears in more than one period
         assert data.groupby("unit")["period"].nunique().max() == 1
+
+    def test_invalid_weight_variation(self):
+        """Test that invalid weight_variation raises ValueError."""
+        import pytest
+        from diff_diff.prep import generate_survey_did_data
+
+        with pytest.raises(ValueError, match="weight_variation must be"):
+            generate_survey_did_data(weight_variation="invalid", seed=42)
+
+    def test_empty_cohort_periods(self):
+        """Test that empty cohort_periods raises ValueError."""
+        import pytest
+        from diff_diff.prep import generate_survey_did_data
+
+        with pytest.raises(ValueError, match="cohort_periods must be"):
+            generate_survey_did_data(cohort_periods=[], seed=42)