Skip to content

Commit 539c7d7

Browse files
igerberclaude
andcommitted
Reuse one TSL SurveyDesign (with FPC) across brand-awareness phases (P1)
CI re-review P1: `bench_brand_awareness_survey.py` declared the analytical TSL path with `SurveyDesign(weights, strata, psu, fpc, nest)` only in phase 2; phases 4 (multi-outcome), 6 (placebo), and 7 (event study + HonestDiD) built their own SurveyDesigns without `fpc`. That means a material share of the committed brand-awareness baselines timed a different variance path than the scenario doc declares. Fix: - One analytical `sd_tsl` SurveyDesign (strata + PSU + FPC + nest=True) is now constructed once at the top of `make_phases` and reused across phases 2, 4, 6, and 7. Phase 3 (replicate weights, JK1) is a different variance surface and correctly keeps its own design. - Regenerated baselines for both backends. - Regenerated findings tables via gen_findings_tables.py. Narrative refreshed against the new tables: - Brand-aware medium: on Python JK1 now leads by ~2.2x (was 1.9x in the previous rerun); on Rust the multi-outcome loop and JK1 come in essentially tied. Medium is also where Python is slowest relative to Rust (~1.6x) - the full analytical TSL path with FPC exposes vectorization differences at that shape. Totals re-converge at large scale. - Reversible dCDH: ~48-52% split under both backends (previously the Python heterogeneity refit edged out the main fit slightly). - Scaling finding #5 retuned: Rust-only uplift is still the SDiD story; brand-aware medium now surfaces as a secondary, modest ~1.6x case rather than "within noise." Still measurement only. No changes under diff_diff/ or rust/. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 3d8c5eb commit 539c7d7

29 files changed

Lines changed: 353 additions & 353 deletions

benchmarks/speed_review/baselines/brand_awareness_survey_large_python.json

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,47 +2,47 @@
22
"scenario": "brand_awareness_survey_large",
33
"backend": "python",
44
"has_rust_backend": false,
5-
"total_seconds": 0.9114786659999998,
5+
"total_seconds": 0.9466241249999998,
66
"memory": {
77
"available": true,
8-
"start_mb": 192.66,
9-
"peak_mb": 335.36,
10-
"growth_mb": 142.7,
8+
"start_mb": 187.52,
9+
"peak_mb": 336.53,
10+
"growth_mb": 149.02,
1111
"sampler_interval_s": 0.01
1212
},
1313
"phases": {
1414
"1_naive_fit_no_survey_design": {
15-
"seconds": 0.01250487499999986,
15+
"seconds": 0.014438584000000088,
1616
"ok": true,
1717
"error": null
1818
},
1919
"2_tsl_strata_psu_fpc": {
20-
"seconds": 0.03581945799999997,
20+
"seconds": 0.02661404200000006,
2121
"ok": true,
2222
"error": null
2323
},
2424
"3_replicate_weights_jk1": {
25-
"seconds": 0.45216445799999994,
25+
"seconds": 0.4855631249999999,
2626
"ok": true,
2727
"error": null
2828
},
2929
"4_multi_outcome_loop_3_metrics": {
30-
"seconds": 0.22825654099999992,
30+
"seconds": 0.24716379199999983,
3131
"ok": true,
3232
"error": null
3333
},
3434
"5_check_parallel_trends": {
35-
"seconds": 0.03940495799999999,
35+
"seconds": 0.02261091699999973,
3636
"ok": true,
3737
"error": null
3838
},
3939
"6_placebo_refit_pre_period": {
40-
"seconds": 0.01122979199999996,
40+
"seconds": 0.01228116700000026,
4141
"ok": true,
4242
"error": null
4343
},
4444
"7_event_study_plus_honest_did": {
45-
"seconds": 0.13208041700000006,
45+
"seconds": 0.13794237500000017,
4646
"ok": true,
4747
"error": null
4848
}

benchmarks/speed_review/baselines/brand_awareness_survey_large_rust.json

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,47 +2,47 @@
22
"scenario": "brand_awareness_survey_large",
33
"backend": "rust",
44
"has_rust_backend": true,
5-
"total_seconds": 0.813743375,
5+
"total_seconds": 0.9458203330000001,
66
"memory": {
77
"available": true,
8-
"start_mb": 192.84,
9-
"peak_mb": 336.91,
10-
"growth_mb": 144.06,
8+
"start_mb": 193.34,
9+
"peak_mb": 343.95,
10+
"growth_mb": 150.61,
1111
"sampler_interval_s": 0.01
1212
},
1313
"phases": {
1414
"1_naive_fit_no_survey_design": {
15-
"seconds": 0.013944833000000045,
15+
"seconds": 0.012478959000000067,
1616
"ok": true,
1717
"error": null
1818
},
1919
"2_tsl_strata_psu_fpc": {
20-
"seconds": 0.026042083000000105,
20+
"seconds": 0.029093875000000047,
2121
"ok": true,
2222
"error": null
2323
},
2424
"3_replicate_weights_jk1": {
25-
"seconds": 0.359846584,
25+
"seconds": 0.4777013749999999,
2626
"ok": true,
2727
"error": null
2828
},
2929
"4_multi_outcome_loop_3_metrics": {
30-
"seconds": 0.2136102500000001,
30+
"seconds": 0.24978320799999998,
3131
"ok": true,
3232
"error": null
3333
},
3434
"5_check_parallel_trends": {
35-
"seconds": 0.033997333000000296,
35+
"seconds": 0.020603166999999978,
3636
"ok": true,
3737
"error": null
3838
},
3939
"6_placebo_refit_pre_period": {
40-
"seconds": 0.026375041999999738,
40+
"seconds": 0.0116797919999998,
4141
"ok": true,
4242
"error": null
4343
},
4444
"7_event_study_plus_honest_did": {
45-
"seconds": 0.13991266599999985,
45+
"seconds": 0.14445204200000017,
4646
"ok": true,
4747
"error": null
4848
}

benchmarks/speed_review/baselines/brand_awareness_survey_medium_python.json

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,47 +2,47 @@
22
"scenario": "brand_awareness_survey_medium",
33
"backend": "python",
44
"has_rust_backend": false,
5-
"total_seconds": 0.521520459,
5+
"total_seconds": 0.790833584,
66
"memory": {
77
"available": true,
8-
"start_mb": 134.75,
9-
"peak_mb": 182.52,
10-
"growth_mb": 47.77,
8+
"start_mb": 133.86,
9+
"peak_mb": 186.22,
10+
"growth_mb": 52.36,
1111
"sampler_interval_s": 0.01
1212
},
1313
"phases": {
1414
"1_naive_fit_no_survey_design": {
15-
"seconds": 0.01229520900000003,
15+
"seconds": 0.014936249999999984,
1616
"ok": true,
1717
"error": null
1818
},
1919
"2_tsl_strata_psu_fpc": {
20-
"seconds": 0.03135670800000001,
20+
"seconds": 0.03534024999999996,
2121
"ok": true,
2222
"error": null
2323
},
2424
"3_replicate_weights_jk1": {
25-
"seconds": 0.1716489579999999,
25+
"seconds": 0.40949133400000004,
2626
"ok": true,
2727
"error": null
2828
},
2929
"4_multi_outcome_loop_3_metrics": {
30-
"seconds": 0.09223670900000003,
30+
"seconds": 0.18262604100000002,
3131
"ok": true,
3232
"error": null
3333
},
3434
"5_check_parallel_trends": {
35-
"seconds": 0.029447917000000157,
35+
"seconds": 0.03471770800000007,
3636
"ok": true,
3737
"error": null
3838
},
3939
"6_placebo_refit_pre_period": {
40-
"seconds": 0.05062770800000016,
40+
"seconds": 0.0515672920000001,
4141
"ok": true,
4242
"error": null
4343
},
4444
"7_event_study_plus_honest_did": {
45-
"seconds": 0.13389041700000015,
45+
"seconds": 0.06213379099999994,
4646
"ok": true,
4747
"error": null
4848
}

benchmarks/speed_review/baselines/brand_awareness_survey_medium_rust.json

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,47 +2,47 @@
22
"scenario": "brand_awareness_survey_medium",
33
"backend": "rust",
44
"has_rust_backend": true,
5-
"total_seconds": 0.491930875,
5+
"total_seconds": 0.49272766700000004,
66
"memory": {
77
"available": true,
8-
"start_mb": 133.44,
9-
"peak_mb": 186.23,
10-
"growth_mb": 52.8,
8+
"start_mb": 136.12,
9+
"peak_mb": 187.19,
10+
"growth_mb": 51.06,
1111
"sampler_interval_s": 0.01
1212
},
1313
"phases": {
1414
"1_naive_fit_no_survey_design": {
15-
"seconds": 0.011377042000000004,
15+
"seconds": 0.01159325,
1616
"ok": true,
1717
"error": null
1818
},
1919
"2_tsl_strata_psu_fpc": {
20-
"seconds": 0.034086916999999994,
20+
"seconds": 0.03190720899999999,
2121
"ok": true,
2222
"error": null
2323
},
2424
"3_replicate_weights_jk1": {
25-
"seconds": 0.13685675000000008,
25+
"seconds": 0.13057945900000012,
2626
"ok": true,
2727
"error": null
2828
},
2929
"4_multi_outcome_loop_3_metrics": {
30-
"seconds": 0.16081833299999992,
30+
"seconds": 0.13259537499999996,
3131
"ok": true,
3232
"error": null
3333
},
3434
"5_check_parallel_trends": {
35-
"seconds": 0.027104291999999974,
35+
"seconds": 0.034756790999999954,
3636
"ok": true,
3737
"error": null
3838
},
3939
"6_placebo_refit_pre_period": {
40-
"seconds": 0.05256474999999994,
40+
"seconds": 0.07622437500000001,
4141
"ok": true,
4242
"error": null
4343
},
4444
"7_event_study_plus_honest_did": {
45-
"seconds": 0.06910883300000004,
45+
"seconds": 0.07503525,
4646
"ok": true,
4747
"error": null
4848
}

benchmarks/speed_review/baselines/brand_awareness_survey_small_python.json

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,47 +2,47 @@
22
"scenario": "brand_awareness_survey_small",
33
"backend": "python",
44
"has_rust_backend": false,
5-
"total_seconds": 0.21071716699999998,
5+
"total_seconds": 0.21868924999999995,
66
"memory": {
77
"available": true,
8-
"start_mb": 116.28,
9-
"peak_mb": 128.09,
10-
"growth_mb": 11.81,
8+
"start_mb": 115.31,
9+
"peak_mb": 127.31,
10+
"growth_mb": 12.0,
1111
"sampler_interval_s": 0.01
1212
},
1313
"phases": {
1414
"1_naive_fit_no_survey_design": {
15-
"seconds": 0.0017718749999999783,
15+
"seconds": 0.002108082999999983,
1616
"ok": true,
1717
"error": null
1818
},
1919
"2_tsl_strata_psu_fpc": {
20-
"seconds": 0.005618791999999928,
20+
"seconds": 0.00682429200000001,
2121
"ok": true,
2222
"error": null
2323
},
2424
"3_replicate_weights_jk1": {
25-
"seconds": 0.017142625000000078,
25+
"seconds": 0.024697250000000004,
2626
"ok": true,
2727
"error": null
2828
},
2929
"4_multi_outcome_loop_3_metrics": {
30-
"seconds": 0.06763025,
30+
"seconds": 0.05683933299999999,
3131
"ok": true,
3232
"error": null
3333
},
3434
"5_check_parallel_trends": {
35-
"seconds": 0.00958991599999992,
35+
"seconds": 0.009950499999999973,
3636
"ok": true,
3737
"error": null
3838
},
3939
"6_placebo_refit_pre_period": {
40-
"seconds": 0.02613770900000001,
40+
"seconds": 0.028082541999999933,
4141
"ok": true,
4242
"error": null
4343
},
4444
"7_event_study_plus_honest_did": {
45-
"seconds": 0.08281808300000004,
45+
"seconds": 0.09016795900000008,
4646
"ok": true,
4747
"error": null
4848
}

benchmarks/speed_review/baselines/brand_awareness_survey_small_rust.json

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,47 +2,47 @@
22
"scenario": "brand_awareness_survey_small",
33
"backend": "rust",
44
"has_rust_backend": true,
5-
"total_seconds": 0.193722167,
5+
"total_seconds": 0.22938095800000002,
66
"memory": {
77
"available": true,
8-
"start_mb": 115.44,
9-
"peak_mb": 128.27,
10-
"growth_mb": 12.83,
8+
"start_mb": 115.72,
9+
"peak_mb": 129.34,
10+
"growth_mb": 13.62,
1111
"sampler_interval_s": 0.01
1212
},
1313
"phases": {
1414
"1_naive_fit_no_survey_design": {
15-
"seconds": 0.0018566250000000561,
15+
"seconds": 0.002027083000000096,
1616
"ok": true,
1717
"error": null
1818
},
1919
"2_tsl_strata_psu_fpc": {
20-
"seconds": 0.005901209000000018,
20+
"seconds": 0.006830167000000054,
2121
"ok": true,
2222
"error": null
2323
},
2424
"3_replicate_weights_jk1": {
25-
"seconds": 0.017941708999999917,
25+
"seconds": 0.028238708000000057,
2626
"ok": true,
2727
"error": null
2828
},
2929
"4_multi_outcome_loop_3_metrics": {
30-
"seconds": 0.05830100000000005,
30+
"seconds": 0.06599037499999993,
3131
"ok": true,
3232
"error": null
3333
},
3434
"5_check_parallel_trends": {
35-
"seconds": 0.009343709000000033,
35+
"seconds": 0.010059291999999997,
3636
"ok": true,
3737
"error": null
3838
},
3939
"6_placebo_refit_pre_period": {
40-
"seconds": 0.02651229200000005,
40+
"seconds": 0.028069124999999917,
4141
"ok": true,
4242
"error": null
4343
},
4444
"7_event_study_plus_honest_did": {
45-
"seconds": 0.07384633299999999,
45+
"seconds": 0.08814829099999999,
4646
"ok": true,
4747
"error": null
4848
}

0 commit comments

Comments
 (0)