@@ -681,6 +681,83 @@ def test_very_large_M(self, mock_multiperiod_results):
681681 assert isinstance (results , HonestDiDResults )
682682 assert results .ci_width > 0
683683
684+ def test_callaway_santanna_universal_base_period (self ):
685+ """Test that reference period (e=-1) is correctly filtered out with universal base period.
686+
687+ The reference period has n_groups=0 and se=NaN, so it should be excluded
688+ from HonestDiD analysis to avoid contaminating the vcov matrix.
689+ """
690+ from diff_diff import CallawaySantAnna , generate_staggered_data
691+
692+ # Generate data and fit with universal base period
693+ data = generate_staggered_data (n_units = 200 , n_periods = 10 , seed = 42 )
694+ cs = CallawaySantAnna (base_period = "universal" )
695+ results = cs .fit (
696+ data ,
697+ outcome = 'outcome' ,
698+ unit = 'unit' ,
699+ time = 'period' ,
700+ first_treat = 'first_treat' ,
701+ aggregate = 'event_study'
702+ )
703+
704+ # Verify reference period exists with NaN SE
705+ assert - 1 in results .event_study_effects
706+ assert np .isnan (results .event_study_effects [- 1 ]['se' ])
707+
708+ # HonestDiD should work without errors (reference period filtered out)
709+ honest = HonestDiD (method = 'relative_magnitude' , M = 1.0 )
710+ bounds = honest .fit (results )
711+
712+ # Should have valid (non-NaN) results
713+ assert isinstance (bounds , HonestDiDResults )
714+ assert np .isfinite (bounds .ci_lb )
715+ assert np .isfinite (bounds .ci_ub )
716+
717+ def test_max_pre_violation_excludes_reference_period (self ):
718+ """Test that reference period (effect=0, n_groups=0) is excluded from max pre-violation.
719+
720+ With universal base period, the reference period e=-1 is a normalization constraint
721+ with n_groups=0. It should not be used in _estimate_max_pre_violation because
722+ its effect is artificially set to 0, which would collapse RM bounds incorrectly.
723+ """
724+ from diff_diff import CallawaySantAnna , generate_staggered_data
725+
726+ # Generate data with universal base period
727+ data = generate_staggered_data (n_units = 200 , n_periods = 10 , seed = 42 )
728+ cs = CallawaySantAnna (base_period = "universal" )
729+ results = cs .fit (
730+ data ,
731+ outcome = 'outcome' ,
732+ unit = 'unit' ,
733+ time = 'period' ,
734+ first_treat = 'first_treat' ,
735+ aggregate = 'event_study'
736+ )
737+
738+ # Verify reference period exists with n_groups=0
739+ assert - 1 in results .event_study_effects
740+ assert results .event_study_effects [- 1 ]['n_groups' ] == 0
741+
742+ # The max pre-violation calculation should exclude the reference period
743+ honest = HonestDiD (method = 'relative_magnitude' , M = 1.0 )
744+
745+ # Get pre_periods excluding reference (n_groups=0)
746+ real_pre_periods = [
747+ t for t in results .event_study_effects
748+ if t < 0 and results .event_study_effects [t ].get ('n_groups' , 1 ) > 0
749+ ]
750+
751+ # If there are real pre-periods, max_violation should be > 0
752+ # (based on actual pre-period effects, not the reference period's effect=0)
753+ if real_pre_periods :
754+ max_violation = honest ._estimate_max_pre_violation (results , real_pre_periods )
755+ # Max violation should reflect actual pre-period coefficients, not 0
756+ # The actual effects are non-zero due to sampling variation
757+ assert max_violation > 0 , (
758+ "max_pre_violation should be > 0 when real pre-periods exist"
759+ )
760+
684761
685762# =============================================================================
686763# Tests for Visualization (without matplotlib)
0 commit comments