@@ -1943,3 +1943,176 @@ def test_matching_weights_no_warning(self):
19431943 with warnings .catch_warnings ():
19441944 warnings .simplefilter ("error" )
19451945 reg .fit (X , y )
1946+
1947+
1948+ class TestRound7Fixes :
1949+ """Tests for round-7 review fixes (PR #218)."""
1950+
1951+ @staticmethod
1952+ def _make_cluster_data (seed = 700 ):
1953+ """Create 2-period DiD data with 10 clusters of 5 obs each."""
1954+ np .random .seed (seed )
1955+ n_clusters = 10
1956+ obs_per_cluster = 5
1957+ rows = []
1958+ for c in range (n_clusters ):
1959+ is_treated = c >= 5
1960+ for i in range (obs_per_cluster ):
1961+ for period in [0 , 1 ]:
1962+ y = 10.0 + c * 0.3 + np .random .randn () * 0.5
1963+ if period == 1 and is_treated :
1964+ y += 3.0
1965+ rows .append ({
1966+ "unit" : c * obs_per_cluster + i ,
1967+ "period" : period ,
1968+ "treated" : int (is_treated ),
1969+ "y" : y ,
1970+ "cluster_id" : c ,
1971+ "w" : 1.0 + 0.2 * c ,
1972+ })
1973+ return pd .DataFrame (rows )
1974+
1975+ def test_cluster_injected_as_psu_did (self ):
1976+ """Cluster IDs injected as PSU produce identical SEs to explicit PSU."""
1977+ data = self ._make_cluster_data ()
1978+
1979+ # Fit with cluster= and weights-only survey (no PSU)
1980+ result_inject = DifferenceInDifferences (cluster = "cluster_id" ).fit (
1981+ data , "y" , "treated" , "period" ,
1982+ survey_design = SurveyDesign (weights = "w" ),
1983+ )
1984+
1985+ # Fit with explicit PSU in survey design
1986+ result_explicit = DifferenceInDifferences (cluster = "cluster_id" ).fit (
1987+ data , "y" , "treated" , "period" ,
1988+ survey_design = SurveyDesign (weights = "w" , psu = "cluster_id" ),
1989+ )
1990+
1991+ np .testing .assert_allclose (result_inject .se , result_explicit .se , atol = 1e-12 )
1992+ assert result_inject .survey_metadata .n_psu == 10
1993+ assert result_inject .survey_metadata .df_survey == 9
1994+
1995+ def test_cluster_injected_as_psu_twfe (self ):
1996+ """TWFE: cluster IDs injected as PSU produce identical SEs to explicit PSU."""
1997+ data = self ._make_cluster_data ()
1998+
1999+ result_inject = TwoWayFixedEffects (cluster = "cluster_id" ).fit (
2000+ data , "y" , "treated" , "period" , unit = "unit" ,
2001+ survey_design = SurveyDesign (weights = "w" ),
2002+ )
2003+
2004+ result_explicit = TwoWayFixedEffects (cluster = "cluster_id" ).fit (
2005+ data , "y" , "treated" , "period" , unit = "unit" ,
2006+ survey_design = SurveyDesign (weights = "w" , psu = "cluster_id" ),
2007+ )
2008+
2009+ np .testing .assert_allclose (result_inject .se , result_explicit .se , atol = 1e-12 )
2010+ assert result_inject .survey_metadata .n_psu == 10
2011+ assert result_inject .survey_metadata .df_survey == 9
2012+
2013+ def test_cluster_injected_as_psu_linear_regression (self ):
2014+ """Standalone LinearRegression: cluster injection matches explicit PSU."""
2015+ np .random .seed (701 )
2016+ n = 50
2017+ cluster_ids = np .repeat (np .arange (10 ), 5 )
2018+ X = np .column_stack ([np .ones (n ), np .random .randn (n )])
2019+ y = 1.0 + X [:, 1 ] * 0.5 + np .random .randn (n ) * 0.4
2020+ weights = np .random .uniform (0.5 , 3.0 , n )
2021+
2022+ # No PSU in resolved design
2023+ resolved_no_psu = ResolvedSurveyDesign (
2024+ weights = weights , weight_type = "pweight" ,
2025+ strata = None , psu = None , fpc = None ,
2026+ n_strata = 0 , n_psu = 0 , lonely_psu = "remove" ,
2027+ )
2028+ reg_inject = LinearRegression (
2029+ include_intercept = False , cluster_ids = cluster_ids ,
2030+ survey_design = resolved_no_psu ,
2031+ )
2032+ reg_inject .fit (X , y )
2033+
2034+ # Explicit PSU
2035+ codes , uniques = pd .factorize (cluster_ids )
2036+ resolved_psu = ResolvedSurveyDesign (
2037+ weights = weights , weight_type = "pweight" ,
2038+ strata = None , psu = codes , fpc = None ,
2039+ n_strata = 0 , n_psu = len (uniques ), lonely_psu = "remove" ,
2040+ )
2041+ reg_explicit = LinearRegression (
2042+ include_intercept = False , cluster_ids = cluster_ids ,
2043+ survey_design = resolved_psu ,
2044+ )
2045+ reg_explicit .fit (X , y )
2046+
2047+ np .testing .assert_allclose (reg_inject .vcov_ , reg_explicit .vcov_ , atol = 1e-12 )
2048+
2049+ def test_cluster_injection_no_effect_when_psu_present (self ):
2050+ """When PSU is already present, _inject_cluster_as_psu is a no-op."""
2051+ from diff_diff .survey import _inject_cluster_as_psu
2052+
2053+ existing_psu = np .array ([0 , 0 , 1 , 1 , 2 , 2 ])
2054+ resolved = ResolvedSurveyDesign (
2055+ weights = np .ones (6 ), weight_type = "pweight" ,
2056+ strata = None , psu = existing_psu , fpc = None ,
2057+ n_strata = 0 , n_psu = 3 , lonely_psu = "remove" ,
2058+ )
2059+ result = _inject_cluster_as_psu (resolved , np .array ([10 , 10 , 20 , 20 , 30 , 30 ]))
2060+ assert result is resolved # Same object — no replacement
2061+
2062+ def test_invalid_weight_type_raises (self ):
2063+ """Invalid weight_type raises ValueError in solve_ols and LinearRegression."""
2064+ n = 20
2065+ X = np .column_stack ([np .ones (n ), np .random .randn (n )])
2066+ y = np .random .randn (n )
2067+ w = np .ones (n )
2068+
2069+ with pytest .raises (ValueError , match = "weight_type must be one of" ):
2070+ solve_ols (X , y , weights = w , weight_type = "pwieght" )
2071+
2072+ with pytest .raises (ValueError , match = "weight_type must be one of" ):
2073+ LinearRegression (weights = w , weight_type = "bad" ).fit (X , y )
2074+
2075+ def test_nan_weights_raises (self ):
2076+ """NaN weights raise ValueError."""
2077+ n = 20
2078+ X = np .column_stack ([np .ones (n ), np .random .randn (n )])
2079+ y = np .random .randn (n )
2080+ w = np .ones (n )
2081+ w [5 ] = np .nan
2082+
2083+ with pytest .raises (ValueError , match = "NaN" ):
2084+ solve_ols (X , y , weights = w )
2085+
2086+ def test_negative_weights_raises (self ):
2087+ """Negative weights raise ValueError."""
2088+ n = 20
2089+ X = np .column_stack ([np .ones (n ), np .random .randn (n )])
2090+ y = np .random .randn (n )
2091+ w = np .ones (n )
2092+ w [3 ] = - 0.5
2093+
2094+ with pytest .raises (ValueError , match = "non-negative" ):
2095+ solve_ols (X , y , weights = w )
2096+
2097+ def test_inf_weights_raises (self ):
2098+ """Inf weights raise ValueError."""
2099+ n = 20
2100+ X = np .column_stack ([np .ones (n ), np .random .randn (n )])
2101+ y = np .random .randn (n )
2102+ w = np .ones (n )
2103+ w [0 ] = np .inf
2104+
2105+ with pytest .raises (ValueError , match = "Inf" ):
2106+ solve_ols (X , y , weights = w )
2107+
2108+ def test_zero_weights_accepted (self ):
2109+ """Zero weights are accepted (intentional divergence from SurveyDesign)."""
2110+ n = 20
2111+ X = np .column_stack ([np .ones (n ), np .random .randn (n )])
2112+ y = np .random .randn (n )
2113+ w = np .ones (n )
2114+ w [0 ] = 0.0
2115+
2116+ # Should NOT raise
2117+ coef , resid , vcov = solve_ols (X , y , weights = w )
2118+ assert coef is not None
0 commit comments