@@ -490,7 +490,7 @@ def test_no_strata_degeneracy(self):
490490 np .testing .assert_allclose (survey_vcov , oracle_vcov , atol = 1e-12 )
491491
492492 def test_weights_only_oracle (self ):
493- """Weights-only design: survey vcov matches hand-computed weighted HC1 ."""
493+ """Weights-only design: survey vcov uses implicit-PSU TSL ."""
494494 np .random .seed (81 )
495495 n = 60
496496 raw_weights = np .random .uniform (0.5 , 3.0 , n )
@@ -512,12 +512,17 @@ def test_weights_only_oracle(self):
512512 )
513513 survey_vcov = compute_survey_vcov (X , resid , resolved )
514514
515- # Correct weighted HC1: (X'WX)^{-1} * X' diag(w * e²) X * n/(n-k) * (X'WX)^{-1}
515+ # TSL with implicit per-observation PSUs:
516+ # meat = sum_i (s_i - s_bar)(s_i - s_bar)' * n/(n-1)
517+ # where s_i = w_i * X_i * e_i
516518 k = X .shape [1 ]
517519 XtWX = X .T @ (X * weights [:, np .newaxis ])
518520 XtWX_inv = np .linalg .inv (XtWX )
519- meat = np .dot (X .T , X * (weights * resid ** 2 )[:, np .newaxis ])
520- meat *= n / (n - k )
521+ scores = X * (weights * resid )[:, np .newaxis ]
522+ scores_mean = scores .mean (axis = 0 , keepdims = True )
523+ centered = scores - scores_mean
524+ adjustment = n / (n - 1 )
525+ meat = adjustment * (centered .T @ centered )
521526 oracle_vcov = XtWX_inv @ meat @ XtWX_inv
522527
523528 np .testing .assert_allclose (survey_vcov , oracle_vcov , atol = 1e-12 )
@@ -1462,7 +1467,7 @@ def test_linear_regression_weighted_rank_deficient_robust(self):
14621467 assert np .isfinite (vcov [i , i ]) and vcov [i , i ] > 0
14631468
14641469 def test_fweight_survey_oracle (self ):
1465- """fweight SurveyDesign: survey vcov matches expanded-data unweighted HC1 ."""
1470+ """fweight SurveyDesign: survey vcov uses implicit-PSU TSL ."""
14661471 np .random .seed (55 )
14671472 n = 30
14681473 X_base = np .column_stack ([np .ones (n ), np .random .randn (n )])
@@ -1485,17 +1490,17 @@ def test_fweight_survey_oracle(self):
14851490 )
14861491 survey_vcov = compute_survey_vcov (X_base , resid_fw , resolved )
14871492
1488- # Oracle: expand data and compute unweighted HC1
1489- X_exp = np . repeat ( X_base , freq . astype ( int ), axis = 0 )
1490- y_exp = np . repeat ( y_base , freq . astype ( int ))
1491- coef_exp , resid_exp , _ = solve_ols ( X_exp , y_exp )
1492- n_exp = X_exp . shape [ 0 ]
1493- k = X_exp . shape [ 1 ]
1494- XtX = X_exp . T @ X_exp
1495- XtX_inv = np . linalg . inv ( XtX )
1496- meat = np . dot ( X_exp . T , X_exp * ( resid_exp ** 2 )[:, np . newaxis ] )
1497- meat *= n_exp / ( n_exp - k )
1498- oracle_vcov = XtX_inv @ meat @ XtX_inv
1493+ # Oracle: TSL with implicit per-observation PSUs
1494+ # scores = w_i * X_i * e_i, meat = n/(n-1) * (scores - mean)' (scores - mean )
1495+ k = X_base . shape [ 1 ]
1496+ XtWX = X_base . T @ ( X_base * freq [:, np . newaxis ] )
1497+ XtWX_inv = np . linalg . inv ( XtWX )
1498+ scores = X_base * ( freq * resid_fw )[:, np . newaxis ]
1499+ scores_mean = scores . mean ( axis = 0 , keepdims = True )
1500+ centered = scores - scores_mean
1501+ adjustment = n / ( n - 1 )
1502+ meat = adjustment * ( centered . T @ centered )
1503+ oracle_vcov = XtWX_inv @ meat @ XtWX_inv
14991504
15001505 np .testing .assert_allclose (survey_vcov , oracle_vcov , atol = 1e-10 )
15011506
@@ -1832,3 +1837,109 @@ def test_resolve_warns_single_psu_unstratified(self):
18321837 )
18331838 with pytest .warns (UserWarning , match = r"Only 1 PSU" ):
18341839 design .resolve (df )
1840+
1841+
1842+ class TestRound6Fixes :
1843+ """Tests for round-6 review fixes (PR #218)."""
1844+
1845+ def test_weights_only_matches_explicit_individual_psu (self ):
1846+ """Weights-only design produces identical vcov to explicit psu=arange(n)."""
1847+ np .random .seed (600 )
1848+ n = 50
1849+ X = np .column_stack ([np .ones (n ), np .random .randn (n )])
1850+ y = 1.0 + X [:, 1 ] * 0.5 + np .random .randn (n ) * 0.4
1851+ weights = np .random .uniform (0.5 , 3.0 , n )
1852+ weights = weights * (n / np .sum (weights ))
1853+
1854+ coef , resid , _ = solve_ols (X , y , weights = weights , weight_type = "pweight" )
1855+
1856+ # Weights-only design (no PSU)
1857+ resolved_wo = ResolvedSurveyDesign (
1858+ weights = weights ,
1859+ weight_type = "pweight" ,
1860+ strata = None ,
1861+ psu = None ,
1862+ fpc = None ,
1863+ n_strata = 0 ,
1864+ n_psu = 0 ,
1865+ lonely_psu = "remove" ,
1866+ )
1867+ vcov_wo = compute_survey_vcov (X , resid , resolved_wo )
1868+
1869+ # Explicit individual-PSU design
1870+ resolved_psu = ResolvedSurveyDesign (
1871+ weights = weights ,
1872+ weight_type = "pweight" ,
1873+ strata = None ,
1874+ psu = np .arange (n ),
1875+ fpc = None ,
1876+ n_strata = 0 ,
1877+ n_psu = n ,
1878+ lonely_psu = "remove" ,
1879+ )
1880+ vcov_psu = compute_survey_vcov (X , resid , resolved_psu )
1881+
1882+ np .testing .assert_allclose (vcov_wo , vcov_psu , atol = 1e-12 )
1883+
1884+ def test_conflicting_weights_warns_and_uses_survey (self ):
1885+ """Explicit weights differing from survey_design triggers warning."""
1886+ np .random .seed (601 )
1887+ n = 40
1888+ X = np .random .randn (n , 2 )
1889+ y = X @ [1.0 , 0.5 ] + np .random .randn (n ) * 0.3
1890+ survey_weights = np .random .uniform (0.5 , 3.0 , n )
1891+ different_weights = np .random .uniform (1.0 , 5.0 , n )
1892+
1893+ resolved = ResolvedSurveyDesign (
1894+ weights = survey_weights ,
1895+ weight_type = "pweight" ,
1896+ strata = None ,
1897+ psu = None ,
1898+ fpc = None ,
1899+ n_strata = 0 ,
1900+ n_psu = 0 ,
1901+ lonely_psu = "remove" ,
1902+ )
1903+
1904+ # Fit with conflicting explicit weights — should warn
1905+ reg_conflict = LinearRegression (
1906+ weights = different_weights , survey_design = resolved
1907+ )
1908+ with pytest .warns (UserWarning , match = "differ from survey_design" ):
1909+ reg_conflict .fit (X , y )
1910+
1911+ # Fit with only survey_design — no explicit weights
1912+ reg_survey = LinearRegression (survey_design = resolved )
1913+ reg_survey .fit (X , y )
1914+
1915+ np .testing .assert_allclose (
1916+ reg_conflict .coefficients_ , reg_survey .coefficients_ , atol = 1e-14
1917+ )
1918+ np .testing .assert_allclose (
1919+ reg_conflict .vcov_ , reg_survey .vcov_ , atol = 1e-14
1920+ )
1921+
1922+ def test_matching_weights_no_warning (self ):
1923+ """Same array object passed as weights and in survey_design: no warning."""
1924+ np .random .seed (602 )
1925+ n = 40
1926+ X = np .random .randn (n , 2 )
1927+ y = X @ [1.0 , 0.5 ] + np .random .randn (n ) * 0.3
1928+ weights = np .random .uniform (0.5 , 3.0 , n )
1929+
1930+ resolved = ResolvedSurveyDesign (
1931+ weights = weights ,
1932+ weight_type = "pweight" ,
1933+ strata = None ,
1934+ psu = None ,
1935+ fpc = None ,
1936+ n_strata = 0 ,
1937+ n_psu = 0 ,
1938+ lonely_psu = "remove" ,
1939+ )
1940+
1941+ # Same array object — should NOT warn
1942+ reg = LinearRegression (weights = weights , survey_design = resolved )
1943+ with warnings .catch_warnings ():
1944+ warnings .simplefilter ("error" )
1945+ reg .fit (X , y )
0 commit comments