Skip to content

Commit 56b2526

Browse files
igerberclaude
andcommitted
Fix CI review Round 5: non-binary metadata, suppress binary decomposition
P1: For non-binary L_max >= 1, use per-group N_l as n_switcher_cells (not binary N_S=0), count non-baseline obs as n_treated_obs, and suppress joiner/leaver decomposition (binary concept). Ensures DID_1 label pairs with consistent per-group metadata. P2: Add test_nonbinary_lmax1_renderer_contract asserting DID_1 label in __repr__/to_dataframe, n_switcher_cells > 0, joiners unavailable. P3: Update to_dataframe() docstring for L_max >= 1 contract. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent bb74570 commit 56b2526

3 files changed

Lines changed: 81 additions & 22 deletions

File tree

diff_diff/chaisemartin_dhaultfoeuille.py

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1831,28 +1831,53 @@ def fit(
18311831
twfe_sigma_fe = twfe_diagnostic_payload.sigma_fe
18321832
twfe_beta_fe = twfe_diagnostic_payload.beta_fe
18331833

1834+
# When L_max >= 1 on non-binary data, the binary-only metadata
1835+
# (N_S, joiner/leaver counts, n_treated_obs) doesn't match the
1836+
# per-group DID_1 estimand. Use per-group metadata instead and
1837+
# suppress the joiner/leaver decomposition.
1838+
effective_N_S = N_S
1839+
effective_n_treated = n_treated_obs_post
1840+
effective_joiners_available = joiners_available
1841+
effective_leavers_available = leavers_available
1842+
if (
1843+
not is_binary
1844+
and L_max is not None
1845+
and L_max >= 1
1846+
and multi_horizon_dids is not None
1847+
and 1 in multi_horizon_dids
1848+
):
1849+
# Use horizon-1 eligible switcher count as the effective N_S
1850+
effective_N_S = multi_horizon_dids[1]["N_l"]
1851+
# Count all observations where treatment differs from baseline
1852+
effective_n_treated = int(
1853+
N_mat[D_mat != D_mat[:, 0:1]].sum()
1854+
) if D_mat.shape[1] > 1 else 0
1855+
# Suppress joiner/leaver decomposition for non-binary
1856+
effective_joiners_available = False
1857+
effective_leavers_available = False
1858+
18341859
results = ChaisemartinDHaultfoeuilleResults(
18351860
overall_att=effective_overall_att,
18361861
overall_se=effective_overall_se,
18371862
overall_t_stat=effective_overall_t,
18381863
overall_p_value=effective_overall_p,
18391864
overall_conf_int=effective_overall_ci,
1840-
joiners_att=joiners_att,
1841-
joiners_se=joiners_se,
1842-
joiners_t_stat=joiners_t,
1843-
joiners_p_value=joiners_p,
1844-
joiners_conf_int=joiners_ci,
1845-
n_joiner_cells=n_joiner_cells,
1846-
n_joiner_obs=n_joiner_obs,
1847-
joiners_available=joiners_available,
1848-
leavers_att=leavers_att,
1849-
leavers_se=leavers_se,
1850-
leavers_t_stat=leavers_t,
1851-
leavers_p_value=leavers_p,
1852-
leavers_conf_int=leavers_ci,
1853-
n_leaver_cells=n_leaver_cells,
1854-
n_leaver_obs=n_leaver_obs,
1855-
leavers_available=leavers_available,
1865+
joiners_att=joiners_att if effective_joiners_available else float("nan"),
1866+
joiners_se=joiners_se if effective_joiners_available else float("nan"),
1867+
joiners_t_stat=joiners_t if effective_joiners_available else float("nan"),
1868+
joiners_p_value=joiners_p if effective_joiners_available else float("nan"),
1869+
joiners_conf_int=joiners_ci if effective_joiners_available else (float("nan"), float("nan")),
1870+
n_joiner_cells=n_joiner_cells if effective_joiners_available else 0,
1871+
n_joiner_obs=n_joiner_obs if effective_joiners_available else 0,
1872+
joiners_available=effective_joiners_available,
1873+
leavers_att=leavers_att if effective_leavers_available else float("nan"),
1874+
leavers_se=leavers_se if effective_leavers_available else float("nan"),
1875+
leavers_t_stat=leavers_t if effective_leavers_available else float("nan"),
1876+
leavers_p_value=leavers_p if effective_leavers_available else float("nan"),
1877+
leavers_conf_int=leavers_ci if effective_leavers_available else (float("nan"), float("nan")),
1878+
n_leaver_cells=n_leaver_cells if effective_leavers_available else 0,
1879+
n_leaver_obs=n_leaver_obs if effective_leavers_available else 0,
1880+
leavers_available=effective_leavers_available,
18561881
placebo_effect=placebo_effect,
18571882
placebo_se=placebo_se,
18581883
placebo_t_stat=placebo_t,
@@ -1863,8 +1888,8 @@ def fit(
18631888
groups=all_groups,
18641889
time_periods=all_periods,
18651890
n_obs=n_obs_post,
1866-
n_treated_obs=n_treated_obs_post,
1867-
n_switcher_cells=N_S,
1891+
n_treated_obs=effective_n_treated,
1892+
n_switcher_cells=effective_N_S,
18681893
n_cohorts=n_cohorts,
18691894
n_groups_dropped_crossers=n_groups_dropped_crossers,
18701895
n_groups_dropped_singleton_baseline=n_groups_dropped_singleton_baseline,

diff_diff/chaisemartin_dhaultfoeuille_results.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -778,10 +778,11 @@ def to_dataframe(self, level: str = "overall") -> pd.DataFrame:
778778
level : str, default="overall"
779779
One of:
780780
781-
- ``"overall"``: single-row table with the overall ``DID_M``
782-
point estimate, SE, t-stat, p-value, CI bounds.
783-
- ``"joiners_leavers"``: three rows for ``DID_M``, ``DID_+``,
784-
and ``DID_-``.
781+
- ``"overall"``: single-row table with the overall estimand
782+
(``DID_M`` when ``L_max=None``, ``DID_1`` when ``L_max=1``,
783+
``delta`` when ``L_max >= 2``).
784+
- ``"joiners_leavers"``: up to three rows for the overall,
785+
``DID_+``, and ``DID_-`` (binary panels only).
785786
- ``"per_period"``: one row per time period with
786787
``did_plus_t``, ``did_minus_t``, switching cell counts, and
787788
the A11-zeroed flags.

tests/test_chaisemartin_dhaultfoeuille.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2422,6 +2422,39 @@ def test_nonbinary_bootstrap(self, ci_params):
24222422
assert r.overall_se == es1["se"]
24232423
assert r.overall_p_value == es1["p_value"]
24242424

2425+
def test_nonbinary_lmax1_renderer_contract(self):
2426+
"""Non-binary L_max=1: summary/to_dataframe use DID_1 label and
2427+
suppress binary-only joiner/leaver decomposition."""
2428+
np.random.seed(77)
2429+
rows = []
2430+
for g in range(20):
2431+
for t in range(6):
2432+
d = 0 if t < 3 else 2
2433+
y = 10 + t + d + np.random.randn() * 0.3
2434+
rows.append({"group": g, "period": t, "treatment": d, "outcome": y})
2435+
for g in range(20, 40):
2436+
for t in range(6):
2437+
y = 10 + t + np.random.randn() * 0.3
2438+
rows.append({"group": g, "period": t, "treatment": 0, "outcome": y})
2439+
df = pd.DataFrame(rows)
2440+
est = ChaisemartinDHaultfoeuille(twfe_diagnostic=False)
2441+
with warnings.catch_warnings():
2442+
warnings.simplefilter("ignore")
2443+
r = est.fit(
2444+
df, outcome="outcome", group="group", time="period",
2445+
treatment="treatment", L_max=1,
2446+
)
2447+
# __repr__ should say DID_1
2448+
assert "DID_1" in repr(r)
2449+
# to_dataframe("overall") should label as DID_1
2450+
df_overall = r.to_dataframe("overall")
2451+
assert df_overall.iloc[0]["estimand"] == "DID_1"
2452+
# n_switcher_cells should be > 0 (from per-group path)
2453+
assert r.n_switcher_cells > 0
2454+
# Joiners/leavers unavailable for non-binary
2455+
assert r.joiners_available is False
2456+
assert r.leavers_available is False
2457+
24252458
def test_twfe_diagnostic_skipped_nonbinary(self):
24262459
"""TWFE diagnostic should be skipped (with warning) for non-binary."""
24272460
np.random.seed(77)

0 commit comments

Comments
 (0)