@@ -2872,3 +2872,47 @@ def test_cluster_as_psu_with_na_rejected(self):
28722872 cluster_ids = np .array ([0 , 1 , np .nan , 0 ])
28732873 with pytest .raises (ValueError , match = "Cluster IDs contain missing" ):
28742874 _inject_cluster_as_psu (resolved , cluster_ids )
2875+
2876+
2877+ class TestRound16Fixes :
2878+ """Tests for PR #218 review round 16: cluster-as-PSU nesting and FPC."""
2879+
2880+ def test_injected_cluster_nested_in_strata (self ):
2881+ """Injected cluster IDs with repeated labels across strata get unique codes."""
2882+ from diff_diff .survey import _inject_cluster_as_psu
2883+
2884+ # 2 strata, cluster "1" appears in both → should produce 4 unique PSUs
2885+ strata = np .array ([0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 ])
2886+ resolved = ResolvedSurveyDesign (
2887+ weights = np .ones (8 ),
2888+ weight_type = "pweight" ,
2889+ strata = strata ,
2890+ psu = None ,
2891+ fpc = None ,
2892+ n_strata = 2 ,
2893+ n_psu = 0 ,
2894+ lonely_psu = "remove" ,
2895+ )
2896+ cluster_ids = np .array ([1 , 1 , 2 , 2 , 1 , 1 , 2 , 2 ]) # labels repeat across strata
2897+ result = _inject_cluster_as_psu (resolved , cluster_ids )
2898+ # Should produce 4 unique PSUs (2 per stratum), not 2
2899+ assert result .n_psu == 4
2900+ # df_survey = n_psu - n_strata = 4 - 2 = 2
2901+ assert result .df_survey == 2
2902+
2903+ def test_fpc_with_strata_no_psu_accepted (self ):
2904+ """FPC + strata (no PSU) is accepted — clusters may be injected later."""
2905+ df = pd .DataFrame (
2906+ {
2907+ "y" : [1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 6.0 ],
2908+ "w" : [1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 ],
2909+ "strat" : [0 , 0 , 0 , 1 , 1 , 1 ],
2910+ "pop" : [10.0 , 10.0 , 10.0 , 20.0 , 20.0 , 20.0 ],
2911+ }
2912+ )
2913+ sd = SurveyDesign (
2914+ weights = "w" , weight_type = "pweight" , strata = "strat" , fpc = "pop"
2915+ )
2916+ # Should not raise — FPC validation defers when no PSU declared
2917+ resolved = sd .resolve (df )
2918+ assert resolved .fpc is not None
0 commit comments