Skip to content

Commit f3d7427

Browse files
igerberclaude
andcommitted
Fix DoseResponseCurve survey df and strengthen P3 tests from PR #226 review (round 10)
- DoseResponseCurve: add df_survey field; to_dataframe() now passes survey df to safe_inference() so exported p-values match fit-time inference instead of using normal approximation - Strengthen Bacon exact-weight test to assert exact vs approximate weights differ (not just finiteness) - Strengthen ContinuousDiD dose-response test to assert exported p-values match safe_inference(..., df=survey_df) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2ce3325 commit f3d7427

3 files changed

Lines changed: 35 additions & 5 deletions

File tree

diff_diff/continuous_did.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,8 @@ def fit(
397397
unit_cohorts=precomp["unit_cohorts"],
398398
)
399399

400+
_survey_df = None # Set by analytical branch when survey is active
401+
400402
if len(post_gt) == 0:
401403
warnings.warn(
402404
"No post-treatment (g,t) cells available for aggregation. "
@@ -665,6 +667,7 @@ def fit(
665667
target="att",
666668
p_value=att_d_p,
667669
n_bootstrap=self.n_bootstrap,
670+
df_survey=_survey_df,
668671
)
669672
dose_response_acrt = DoseResponseCurve(
670673
dose_grid=dvals,
@@ -675,6 +678,7 @@ def fit(
675678
target="acrt",
676679
p_value=acrt_d_p,
677680
n_bootstrap=self.n_bootstrap,
681+
df_survey=_survey_df,
678682
)
679683

680684
# Strip bootstrap internals from gt_results

diff_diff/continuous_did_results.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class DoseResponseCurve:
4545
target: str
4646
p_value: Optional[np.ndarray] = None
4747
n_bootstrap: int = 0
48+
df_survey: Optional[int] = None
4849

4950
def to_dataframe(self) -> pd.DataFrame:
5051
"""Convert to DataFrame with dose, effect, se, CI, t_stat, p_value."""
@@ -60,7 +61,7 @@ def to_dataframe(self) -> pd.DataFrame:
6061
t_stat = np.full(n, np.nan)
6162
p_value = np.full(n, np.nan)
6263
for i in range(n):
63-
t_i, p_i, _ = safe_inference(self.effects[i], self.se[i])
64+
t_i, p_i, _ = safe_inference(self.effects[i], self.se[i], df=self.df_survey)
6465
t_stat[i] = t_i
6566
p_value[i] = p_i
6667
return pd.DataFrame(

tests/test_survey_phase3.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,25 @@ def test_exact_weights_survey_weighted(self, staggered_survey_data):
405405
assert len(r.comparisons) > 0
406406
for comp in r.comparisons:
407407
assert np.isfinite(comp.weight)
408+
# With non-uniform weights, exact weights should differ from
409+
# approximate weights (approximate uses n_k*(1-n_k)*Var(D))
410+
r_approx = BaconDecomposition(weights="approximate").fit(
411+
staggered_survey_data,
412+
"outcome",
413+
"unit",
414+
"time",
415+
"first_treat",
416+
survey_design=sd,
417+
)
418+
# At least one comparison weight should differ
419+
exact_weights = {(c.treated_group, c.control_group): c.weight for c in r.comparisons}
420+
approx_weights = {
421+
(c.treated_group, c.control_group): c.weight for c in r_approx.comparisons
422+
}
423+
common_keys = set(exact_weights) & set(approx_weights)
424+
assert len(common_keys) > 0
425+
diffs = [abs(exact_weights[k] - approx_weights[k]) for k in common_keys]
426+
assert max(diffs) > 1e-10, "Exact and approximate weights should differ"
408427

409428

410429
# =============================================================================
@@ -886,6 +905,7 @@ def test_sun_abraham_survey_df_regression(self, staggered_survey_data):
886905
def test_continuous_did_dose_response_survey_pvalue(self, continuous_survey_data):
887906
"""DoseResponseCurve.to_dataframe() p-values should use survey df."""
888907
from diff_diff import ContinuousDiD
908+
from diff_diff.utils import safe_inference
889909

890910
sd = SurveyDesign(weights="weight", strata="stratum")
891911
result = ContinuousDiD(n_bootstrap=0).fit(
@@ -899,8 +919,13 @@ def test_continuous_did_dose_response_survey_pvalue(self, continuous_survey_data
899919
)
900920
sm = result.survey_metadata
901921
assert sm is not None
902-
# Check that dose-response curve p-values are finite
922+
assert sm.df_survey is not None
923+
# Check that dose-response curve carries survey df
924+
assert result.dose_response_att.df_survey == sm.df_survey
925+
# Check exported p-values use survey df, not normal approx
903926
att_df = result.dose_response_att.to_dataframe()
904-
assert "p_value" in att_df.columns
905-
finite_p = att_df["p_value"].dropna()
906-
assert len(finite_p) > 0
927+
for i in range(min(3, len(att_df))):
928+
row = att_df.iloc[i]
929+
if np.isfinite(row["effect"]) and np.isfinite(row["se"]) and row["se"] > 0:
930+
_, expected_p, _ = safe_inference(row["effect"], row["se"], df=sm.df_survey)
931+
assert row["p_value"] == pytest.approx(expected_p, rel=1e-10)

0 commit comments

Comments
 (0)