@@ -512,12 +512,11 @@ def test_weights_only_oracle(self):
512512 )
513513 survey_vcov = compute_survey_vcov (X , resid , resolved )
514514
515- # Hand-compute weighted HC1: (X'WX)^{-1} * (sum w_i^2 X_i X_i' e_i^2) * n/(n-k) * (X'WX)^{-1}
515+ # Correct weighted HC1: (X'WX)^{-1} * X' diag(w * e²) X * n/(n-k) * (X'WX)^{-1}
516516 k = X .shape [1 ]
517517 XtWX = X .T @ (X * weights [:, np .newaxis ])
518518 XtWX_inv = np .linalg .inv (XtWX )
519- scores = X * (weights * resid )[:, np .newaxis ]
520- meat = scores .T @ scores
519+ meat = np .dot (X .T , X * (weights * resid ** 2 )[:, np .newaxis ])
521520 meat *= n / (n - k )
522521 oracle_vcov = XtWX_inv @ meat @ XtWX_inv
523522
@@ -1462,6 +1461,147 @@ def test_linear_regression_weighted_rank_deficient_robust(self):
14621461 for i in kept :
14631462 assert np .isfinite (vcov [i , i ]) and vcov [i , i ] > 0
14641463
1464+ def test_fweight_survey_oracle (self ):
1465+ """fweight SurveyDesign: survey vcov matches expanded-data unweighted HC1."""
1466+ np .random .seed (55 )
1467+ n = 30
1468+ X_base = np .column_stack ([np .ones (n ), np .random .randn (n )])
1469+ y_base = 2.0 + X_base [:, 1 ] * 1.5 + np .random .randn (n ) * 0.3
1470+ freq = np .random .choice ([1 , 2 , 3 ], n ).astype (float )
1471+
1472+ # WLS with fweights via survey
1473+ coef_fw , resid_fw , _ = solve_ols (
1474+ X_base , y_base , weights = freq , weight_type = "fweight"
1475+ )
1476+ resolved = ResolvedSurveyDesign (
1477+ weights = freq ,
1478+ weight_type = "fweight" ,
1479+ strata = None ,
1480+ psu = None ,
1481+ fpc = None ,
1482+ n_strata = 0 ,
1483+ n_psu = 0 ,
1484+ lonely_psu = "remove" ,
1485+ )
1486+ survey_vcov = compute_survey_vcov (X_base , resid_fw , resolved )
1487+
1488+ # Oracle: expand data and compute unweighted HC1
1489+ X_exp = np .repeat (X_base , freq .astype (int ), axis = 0 )
1490+ y_exp = np .repeat (y_base , freq .astype (int ))
1491+ coef_exp , resid_exp , _ = solve_ols (X_exp , y_exp )
1492+ n_exp = X_exp .shape [0 ]
1493+ k = X_exp .shape [1 ]
1494+ XtX = X_exp .T @ X_exp
1495+ XtX_inv = np .linalg .inv (XtX )
1496+ meat = np .dot (X_exp .T , X_exp * (resid_exp ** 2 )[:, np .newaxis ])
1497+ meat *= n_exp / (n_exp - k )
1498+ oracle_vcov = XtX_inv @ meat @ XtX_inv
1499+
1500+ np .testing .assert_allclose (survey_vcov , oracle_vcov , atol = 1e-10 )
1501+
1502+ def test_survey_rank_deficient_with_psu (self ):
1503+ """LinearRegression + survey design (PSU) + rank deficiency: no crash."""
1504+ np .random .seed (43 )
1505+ n = 50
1506+ x1 = np .random .randn (n )
1507+ X = np .column_stack ([np .ones (n ), x1 , x1 ]) # duplicate col
1508+ y = 2.0 + 1.5 * x1 + np .random .randn (n ) * 0.3
1509+ pw = np .random .uniform (0.5 , 3.0 , size = n )
1510+ psu = np .arange (n ) # each obs is its own PSU
1511+
1512+ resolved = ResolvedSurveyDesign (
1513+ weights = pw ,
1514+ weight_type = "pweight" ,
1515+ strata = None ,
1516+ psu = psu ,
1517+ fpc = None ,
1518+ n_strata = 0 ,
1519+ n_psu = n ,
1520+ lonely_psu = "remove" ,
1521+ )
1522+
1523+ model = LinearRegression (
1524+ survey_design = resolved ,
1525+ include_intercept = False ,
1526+ rank_deficient_action = "warn" ,
1527+ )
1528+
1529+ with warnings .catch_warnings ():
1530+ warnings .simplefilter ("ignore" , UserWarning )
1531+ model .fit (X , y )
1532+
1533+ coef = model .coefficients_
1534+ resid = model .residuals_
1535+ vcov = model .vcov_
1536+
1537+ # One dropped coefficient
1538+ assert np .sum (np .isnan (coef )) == 1
1539+
1540+ # Residuals all finite
1541+ assert np .all (np .isfinite (resid ))
1542+
1543+ # Identified coefficients have positive, finite SEs
1544+ kept = np .where (~ np .isnan (coef ))[0 ]
1545+ for i in kept :
1546+ assert np .isfinite (vcov [i , i ]) and vcov [i , i ] > 0
1547+
1548+ # Dropped column has NaN vcov
1549+ dropped = np .where (np .isnan (coef ))[0 ]
1550+ for i in dropped :
1551+ assert np .all (np .isnan (vcov [i , :]))
1552+ assert np .all (np .isnan (vcov [:, i ]))
1553+
1554+ def test_survey_rank_deficient_weights_only (self ):
1555+ """Weights-only survey + rank deficiency: no crash, correct NaN pattern."""
1556+ np .random .seed (44 )
1557+ n = 50
1558+ x1 = np .random .randn (n )
1559+ X = np .column_stack ([np .ones (n ), x1 , x1 ]) # duplicate col
1560+ y = 2.0 + 1.5 * x1 + np .random .randn (n ) * 0.3
1561+ pw = np .random .uniform (0.5 , 3.0 , size = n )
1562+
1563+ resolved = ResolvedSurveyDesign (
1564+ weights = pw ,
1565+ weight_type = "pweight" ,
1566+ strata = None ,
1567+ psu = None ,
1568+ fpc = None ,
1569+ n_strata = 0 ,
1570+ n_psu = 0 ,
1571+ lonely_psu = "remove" ,
1572+ )
1573+
1574+ model = LinearRegression (
1575+ survey_design = resolved ,
1576+ include_intercept = False ,
1577+ rank_deficient_action = "warn" ,
1578+ )
1579+
1580+ with warnings .catch_warnings ():
1581+ warnings .simplefilter ("ignore" , UserWarning )
1582+ model .fit (X , y )
1583+
1584+ coef = model .coefficients_
1585+ resid = model .residuals_
1586+ vcov = model .vcov_
1587+
1588+ # One dropped coefficient
1589+ assert np .sum (np .isnan (coef )) == 1
1590+
1591+ # Residuals all finite
1592+ assert np .all (np .isfinite (resid ))
1593+
1594+ # Identified coefficients have positive, finite SEs
1595+ kept = np .where (~ np .isnan (coef ))[0 ]
1596+ for i in kept :
1597+ assert np .isfinite (vcov [i , i ]) and vcov [i , i ] > 0
1598+
1599+ # Dropped column has NaN vcov
1600+ dropped = np .where (np .isnan (coef ))[0 ]
1601+ for i in dropped :
1602+ assert np .all (np .isnan (vcov [i , :]))
1603+ assert np .all (np .isnan (vcov [:, i ]))
1604+
14651605 def test_linear_regression_weighted_rank_deficient_classical (self ):
14661606 """LinearRegression with weights + classical vcov + rank deficiency."""
14671607 np .random .seed (42 )
0 commit comments