Fix NaN t-statistics across 7 locations for consistent undefined inference

igerber · claude · igerber · commit 1b97f7870047 · 2026-01-31T15:41:10.000-05:00
Replace `else 0.0` with `else np.nan` when SE is non-finite or zero in t-stat calculations across sun_abraham.py, triple_diff.py, and diagnostics.py. Add CI guards returning (NaN, NaN) for 4 downstream confidence interval computations. Matches the CallawaySantAnna pattern established in PR #97. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
diff --git a/diff_diff/diagnostics.py b/diff_diff/diagnostics.py
@@ -662,7 +662,7 @@ def permutation_test(
     ci_upper = np.percentile(valid_effects, (1 - alpha / 2) * 100)
 
     # T-stat from original estimate
-    t_stat = original_att / se if se > 0 else 0.0
+    t_stat = original_att / se if np.isfinite(se) and se > 0 else np.nan
 
     return PlaceboTestResults(
         test_type="permutation",
@@ -783,14 +783,14 @@ def leave_one_out_test(
     # Statistics of LOO distribution
     mean_effect = np.mean(valid_effects)
     se = np.std(valid_effects, ddof=1) if len(valid_effects) > 1 else 0.0
-    t_stat = mean_effect / se if se > 0 else 0.0
+    t_stat = mean_effect / se if np.isfinite(se) and se > 0 else np.nan
 
     # Use t-distribution for p-value
     df = len(valid_effects) - 1 if len(valid_effects) > 1 else 1
     p_value = compute_p_value(t_stat, df=df)
 
     # CI
-    conf_int = compute_confidence_interval(mean_effect, se, alpha, df=df)
+    conf_int = compute_confidence_interval(mean_effect, se, alpha, df=df) if np.isfinite(se) and se > 0 else (np.nan, np.nan)
 
     return PlaceboTestResults(
         test_type="leave_one_out",
diff --git a/diff_diff/sun_abraham.py b/diff_diff/sun_abraham.py
@@ -600,9 +600,9 @@ def fit(
             coef_index_map,
         )
 
-        overall_t = overall_att / overall_se if overall_se > 0 else 0.0
+        overall_t = overall_att / overall_se if np.isfinite(overall_se) and overall_se > 0 else np.nan
         overall_p = compute_p_value(overall_t)
-        overall_ci = compute_confidence_interval(overall_att, overall_se, self.alpha)
+        overall_ci = compute_confidence_interval(overall_att, overall_se, self.alpha) if np.isfinite(overall_se) and overall_se > 0 else (np.nan, np.nan)
 
         # Run bootstrap if requested
         bootstrap_results = None
@@ -623,7 +623,7 @@ def fit(
 
             # Update results with bootstrap inference
             overall_se = bootstrap_results.overall_att_se
-            overall_t = overall_att / overall_se if overall_se > 0 else 0.0
+            overall_t = overall_att / overall_se if np.isfinite(overall_se) and overall_se > 0 else np.nan
             overall_p = bootstrap_results.overall_att_p_value
             overall_ci = bootstrap_results.overall_att_ci
 
@@ -640,7 +640,7 @@ def fit(
                     eff_val = event_study_effects[e]["effect"]
                     se_val = event_study_effects[e]["se"]
                     event_study_effects[e]["t_stat"] = (
-                        eff_val / se_val if se_val > 0 else 0.0
+                        eff_val / se_val if np.isfinite(se_val) and se_val > 0 else np.nan
                     )
 
         # Convert cohort effects to storage format
@@ -878,9 +878,9 @@ def _compute_iw_effects(
             agg_var = float(weight_vec @ vcov_subset @ weight_vec)
             agg_se = np.sqrt(max(agg_var, 0))
 
-            t_stat = agg_effect / agg_se if agg_se > 0 else 0.0
+            t_stat = agg_effect / agg_se if np.isfinite(agg_se) and agg_se > 0 else np.nan
             p_val = compute_p_value(t_stat)
-            ci = compute_confidence_interval(agg_effect, agg_se, self.alpha)
+            ci = compute_confidence_interval(agg_effect, agg_se, self.alpha) if np.isfinite(agg_se) and agg_se > 0 else (np.nan, np.nan)
 
             event_study_effects[e] = {
                 "effect": agg_effect,
diff --git a/diff_diff/triple_diff.py b/diff_diff/triple_diff.py
@@ -598,14 +598,14 @@ def fit(
             )
 
         # Compute inference
-        t_stat = att / se if se > 0 else 0.0
+        t_stat = att / se if np.isfinite(se) and se > 0 else np.nan
         df = n_obs - 8  # Approximate df (8 cell means)
         if covariates:
             df -= len(covariates)
         df = max(df, 1)
 
         p_value = compute_p_value(t_stat, df=df)
-        conf_int = compute_confidence_interval(att, se, self.alpha, df=df)
+        conf_int = compute_confidence_interval(att, se, self.alpha, df=df) if np.isfinite(se) and se > 0 else (np.nan, np.nan)
 
         # Get number of clusters if clustering
         n_clusters = None
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -16,6 +16,7 @@ This document provides the academic foundations and key implementation requireme
    - [TripleDifference](#tripledifference)
    - [TROP](#trop)
 4. [Diagnostics & Sensitivity](#diagnostics--sensitivity)
+   - [PlaceboTests](#placebotests)
    - [BaconDecomposition](#bacondecomposition)
    - [HonestDiD](#honestdid)
    - [PreTrendsPower](#pretrendspower)
@@ -319,6 +320,11 @@ where weights ŵ_{g,e} = n_{g,e} / Σ_g n_{g,e} (sample share of cohort g at eve
   - Detection: Pivoted QR decomposition with tolerance `1e-07` (R's `qr()` default)
   - Handling: Warns and drops linearly dependent columns, sets NA for dropped coefficients (R-style, matches `lm()`)
   - Parameter: `rank_deficient_action` controls behavior: "warn" (default), "error", or "silent"
+- NaN inference for undefined statistics:
+  - t_stat: Uses NaN (not 0.0) when SE is non-finite or zero
+  - p_value and CI: Also NaN when t_stat is NaN
+  - Applies to overall ATT, per-effect event study, and aggregated event study
+  - **Note**: Defensive enhancement matching CallawaySantAnna behavior; R's `fixest::sunab()` may produce Inf/NaN without warning
 
 **Reference implementation(s):**
 - R: `fixest::sunab()` (Laurent Bergé's implementation)
@@ -429,6 +435,10 @@ Doubly robust estimator:
 - Propensity scores near 0/1: trimmed at `pscore_trim` (default 0.01)
 - Empty cells: raises ValueError with diagnostic message
 - Collinear covariates: automatic detection and warning
+- NaN inference for undefined statistics:
+  - t_stat: Uses NaN (not 0.0) when SE is non-finite or zero
+  - p_value and CI: Also NaN when t_stat is NaN
+  - **Note**: Defensive enhancement; reference implementation behavior not yet documented
 
 **Reference implementation(s):**
 - Authors' replication code (forthcoming)
@@ -656,6 +666,18 @@ For joint method, LOOCV works as follows:
 
 # Diagnostics & Sensitivity
 
+## PlaceboTests
+
+**Module:** `diff_diff/diagnostics.py`
+
+*Edge cases:*
+- NaN inference for undefined statistics:
+  - `permutation_test`: t_stat is NaN when permutation SE is zero (all permutations produce identical estimates)
+  - `leave_one_out_test`: t_stat, p_value, CI are NaN when LOO SE is zero (all LOO effects identical)
+  - **Note**: Defensive enhancement matching CallawaySantAnna NaN convention
+
+---
+
 ## BaconDecomposition
 
 **Primary source:** [Goodman-Bacon, A. (2021). Difference-in-differences with variation in treatment timing. *Journal of Econometrics*, 225(2), 254-277.](https://doi.org/10.1016/j.jeconom.2021.03.014)
diff --git a/tests/test_diagnostics.py b/tests/test_diagnostics.py
@@ -672,3 +672,128 @@ def test_returns_dict_structure(self, simple_panel_data):
         # Check that each result is either PlaceboTestResults or error dict
         for key, value in results.items():
             assert isinstance(value, (PlaceboTestResults, dict))
+
+
+class TestDiagnosticsTStatNaN:
+    """Tests for NaN t_stat when SE is invalid in diagnostic functions."""
+
+    def test_permutation_test_tstat_nan_when_se_zero(self):
+        """permutation_test t_stat is NaN when SE is zero (all permutations identical)."""
+        np.random.seed(42)
+
+        # Create data where all units have deterministic outcomes
+        # so permutation distribution has zero variance
+        n_units = 20
+        data = []
+        for unit in range(n_units):
+            is_treated = unit < n_units // 2
+            for post in [0, 1]:
+                y = 5.0
+                if is_treated and post == 1:
+                    y += 2.0
+                data.append({
+                    "unit": unit,
+                    "post": post,
+                    "outcome": y,
+                    "treated": int(is_treated),
+                })
+
+        df = pd.DataFrame(data)
+
+        import warnings
+        with warnings.catch_warnings(record=True):
+            warnings.simplefilter("always")
+            result = permutation_test(
+                df,
+                outcome="outcome",
+                treatment="treated",
+                time="post",
+                unit="unit",
+                n_permutations=20,
+                seed=42,
+            )
+
+        se = result.se
+        t_stat = result.t_stat
+
+        if not np.isfinite(se) or se == 0:
+            assert np.isnan(t_stat), (
+                f"permutation t_stat should be NaN when SE={se}, got {t_stat}"
+            )
+        else:
+            expected = result.original_effect / se
+            assert np.isclose(t_stat, expected), (
+                f"permutation t_stat should be effect/SE, "
+                f"expected {expected}, got {t_stat}"
+            )
+
+    def test_leave_one_out_tstat_nan_when_se_zero(self):
+        """leave_one_out_test t_stat and CI are NaN when SE is zero."""
+        np.random.seed(42)
+
+        # Create data where leaving out any unit gives identical results
+        # (deterministic outcomes, no noise)
+        n_units = 20
+        data = []
+        for unit in range(n_units):
+            is_treated = unit < n_units // 2
+            for post in [0, 1]:
+                y = 5.0
+                if is_treated and post == 1:
+                    y += 2.0
+                data.append({
+                    "unit": unit,
+                    "post": post,
+                    "outcome": y,
+                    "treated": int(is_treated),
+                })
+
+        df = pd.DataFrame(data)
+
+        import warnings
+        with warnings.catch_warnings(record=True):
+            warnings.simplefilter("always")
+            result = leave_one_out_test(
+                df,
+                outcome="outcome",
+                treatment="treated",
+                time="post",
+                unit="unit",
+            )
+
+        se = result.se
+        t_stat = result.t_stat
+
+        if not np.isfinite(se) or se == 0:
+            assert np.isnan(t_stat), (
+                f"LOO t_stat should be NaN when SE={se}, got {t_stat}"
+            )
+            ci = result.conf_int
+            assert np.isnan(ci[0]) and np.isnan(ci[1]), (
+                f"LOO conf_int should be (NaN, NaN) when SE={se}, got {ci}"
+            )
+
+    def test_permutation_tstat_consistency(self, simple_panel_data):
+        """permutation_test t_stat = effect/SE when SE is valid."""
+        result = permutation_test(
+            simple_panel_data,
+            outcome="outcome",
+            treatment="treated",
+            time="post",
+            unit="unit",
+            n_permutations=50,
+            seed=42,
+        )
+
+        se = result.se
+        t_stat = result.t_stat
+
+        if not np.isfinite(se) or se == 0:
+            assert np.isnan(t_stat), (
+                f"t_stat should be NaN when SE={se}, got {t_stat}"
+            )
+        else:
+            expected = result.original_effect / se
+            assert np.isclose(t_stat, expected), (
+                f"t_stat should be effect/SE, expected {expected}, got {t_stat}"
+            )
diff --git a/tests/test_sun_abraham.py b/tests/test_sun_abraham.py
diff --git a/tests/test_triple_diff.py b/tests/test_triple_diff.py