Skip to content

Commit f30c121

Browse files
igerberclaude
andcommitted
Address PR #356 CI review round 9 (1 P1 + 1 P2 semantic)
Rebased onto current main (resolved CHANGELOG.md conflict: the by_path bullet from PR #355 and the profile_panel/autonomous-guide bullet from this PR now live side-by-side under [Unreleased]). has_always_treated now has binary-only semantics: - For binary treatment (absorbing or non-absorbing): unit_min == 1 means the unit is treated in every observed period (no pre-treatment information in the DiD sense). - For continuous treatment: always False. Pre-treatment periods on continuous DiD are determined by the separate `first_treat` column supplied to `ContinuousDiD.fit`, not by whether the dose is positive. A unit with a constant positive dose can still have well-defined pre-treatment periods, so flagging it as "always-treated / no pre-treatment information" was factually wrong and triggered the misleading `has_always_treated_units` alert on valid continuous panels. - Categorical: False by construction. Guide §2 has_always_treated field doc updated to state the binary-only semantics explicitly, with a note about `first_treat`. Tests: - New: test_continuous_positive_dose_does_not_fire_has_always_treated asserts has_always_treated=False AND the alert does not fire on a constant-positive-dose continuous panel. - Existing test_continuous_zero_dose_controls_flag_has_never_treated updated: has_always_treated expected to be False (was True under the old semantics). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 6a1372b commit f30c121

3 files changed

Lines changed: 53 additions & 11 deletions

File tree

diff_diff/guides/llms-autonomous.txt

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,15 @@ view. Every field below appears as a top-level key in that dict.
114114
is not yet implemented). Preferred-but-optional by
115115
`CallawaySantAnna` and `ChaisemartinDHaultfoeuille`. Always `False`
116116
for `"categorical"`.
117-
- **`has_always_treated: bool`** - at least one unit has
118-
strictly-positive treatment in every observed non-NaN row. Such units
119-
provide no pre-treatment identification and are dropped by most
120-
estimators. Always `False` for `"categorical"`.
117+
- **`has_always_treated: bool`** - at least one binary-treatment
118+
unit has `treatment == 1` in every observed non-NaN row (no
119+
pre-treatment information for that unit in the DiD sense).
120+
Binary-only semantics: for `"continuous"` panels this field is
121+
always `False` because pre-treatment periods are determined by the
122+
`first_treat` column supplied to `ContinuousDiD.fit()`, not by
123+
whether the dose is positive - a unit with a constant positive dose
124+
can still have well-defined pre-treatment periods. Always `False`
125+
for `"categorical"` too.
121126
- **`treatment_varies_within_unit: bool`** - at least one unit has more
122127
than one distinct non-NaN treatment value across its observed rows.
123128
For binary panels this is normally `True` (pre vs. post the adoption

diff_diff/profile.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -353,16 +353,25 @@ def _classify_treatment(
353353
if n_distinct == 0:
354354
return ("categorical", False, {}, False, False, None, None)
355355

356-
# Generic never-/always-treated semantics (applies to both binary
357-
# and continuous numeric treatment): "never-treated" means the unit
358-
# has treatment == 0 in every observed non-NaN row; "always-treated"
359-
# means treatment > 0 in every observed non-NaN row.
356+
# has_never_treated has a single well-defined meaning across binary
357+
# and continuous numeric treatment: some unit has treatment == 0 in
358+
# every observed non-NaN row. For binary this is the clean-control
359+
# group; for continuous this is the zero-dose control required by
360+
# ContinuousDiD (P(D=0) > 0).
360361
unit_max = df.groupby(unit)[treatment].max().to_numpy()
361362
unit_min = df.groupby(unit)[treatment].min().to_numpy()
362363
has_never_treated = bool(np.any(unit_max == 0))
363-
has_always_treated = bool(np.any(unit_min > 0))
364364

365365
is_binary_valued = values_set <= {0, 1, 0.0, 1.0}
366+
# has_always_treated has binary-only semantics: "unit is treated in
367+
# every observed period" = unit_min == 1 on a binary panel (no
368+
# pre-treatment information). For continuous panels, positive dose
369+
# throughout does not mean "always treated in the DiD sense"
370+
# (pre-treatment periods are determined by `first_treat`, not by
371+
# whether the dose is positive), so this field is False for
372+
# continuous / categorical types.
373+
has_always_treated = is_binary_valued and bool(np.any(unit_min == 1))
374+
366375
if not is_binary_valued:
367376
return (
368377
"continuous",

tests/test_profile_panel.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,32 @@ def test_binary_absorbing_varies_within_unit():
129129
assert profile.treatment_varies_within_unit is True
130130

131131

132+
def test_continuous_positive_dose_does_not_fire_has_always_treated():
133+
"""Valid ContinuousDiD panels have units with a constant positive
134+
dose across all periods AND well-defined pre-treatment periods
135+
(via a separate `first_treat` column). `has_always_treated` has
136+
binary-only semantics, so it must be False on continuous panels
137+
regardless of dose positivity. Previously the field conflated
138+
"positive dose throughout" with "always treated in the DiD sense",
139+
which fired the misleading `has_always_treated_units` alert on
140+
valid continuous-DiD panels."""
141+
rng = np.random.default_rng(0)
142+
rows = []
143+
for u in range(1, 21):
144+
dose = 0.0 if u <= 5 else 2.5
145+
for t in range(4):
146+
rows.append({"u": u, "t": t, "tr": dose, "y": rng.normal()})
147+
df = pd.DataFrame(rows)
148+
profile = profile_panel(df, unit="u", time="t", treatment="tr", outcome="y")
149+
assert profile.treatment_type == "continuous"
150+
assert profile.has_never_treated is True
151+
assert profile.has_always_treated is False, (
152+
"has_always_treated must be False on continuous panels regardless "
153+
"of dose positivity (binary-only semantics)"
154+
)
155+
assert "has_always_treated_units" not in _alert_codes(profile)
156+
157+
132158
def test_categorical_treatment_object_dtype():
133159
rows = []
134160
for u in range(1, 11):
@@ -386,7 +412,9 @@ def test_reversal_through_nan_is_binary_non_absorbing():
386412
def test_continuous_zero_dose_controls_flag_has_never_treated():
387413
"""Continuous treatment with some zero-dose units must flag
388414
has_never_treated=True. Previously continuous panels hardcoded
389-
has_never_treated=False regardless of control availability."""
415+
has_never_treated=False regardless of control availability.
416+
has_always_treated has binary-only semantics and must remain
417+
False on continuous panels regardless of dose positivity."""
390418
rows = []
391419
rng = np.random.default_rng(0)
392420
for u in range(1, 21):
@@ -397,7 +425,7 @@ def test_continuous_zero_dose_controls_flag_has_never_treated():
397425
profile = profile_panel(df, unit="u", time="t", treatment="tr", outcome="y")
398426
assert profile.treatment_type == "continuous"
399427
assert profile.has_never_treated is True
400-
assert profile.has_always_treated is True
428+
assert profile.has_always_treated is False
401429

402430

403431
def test_guide_api_strings_resolve_against_public_api():

0 commit comments

Comments
 (0)