|
56 | 56 | % from the bootstrap samples. This can be a function handle or string
|
57 | 57 | % corresponding to the function name. If empty, the default is @mean or
|
58 | 58 | % 'mean'. If DATA is multivariate, bootfun is the grand mean, which is
|
59 |
| -% is the mean of the means of each column (i.e. variates). Since bootnhst |
60 |
| -% calculates sampling variance using Tukey's jacknife, bootfun must be a |
61 |
| -% smooth function of the DATA. If a robust statistic like the median is |
62 |
| -% required, use 'robust', which uses a smoothed version of the median |
63 |
| -% for univariate or multivariate DATA (see function help for smoothmedian). |
| 59 | +% is the mean of the means of each column (i.e. variates). The standard |
| 60 | +% errors are estimated from 200 bootknife resamples [2]. If a robust |
| 61 | +% statistic for central location is required, setting bootfun to 'robust' |
| 62 | +% implements a smoothed version of the median (see function help for |
| 63 | +% smoothmedian). |
64 | 64 | %
|
65 | 65 | % p = bootnhst(DATA,GROUP,ref,bootfun,nboot) sets the number of bootstrap
|
66 | 66 | % resamples. Increasing nboot reduces the monte carlo error of the p-value
|
|
106 | 106 | % standard error of the difference (derived from the pooled, weighted
|
107 | 107 | % mean, sampling variance). To compare the q-ratio reported here with
|
108 | 108 | % Tukey's more traditional q-statistic, multiply it by sqrt(2). Note
|
109 |
| -% that because the sampling variance is estimated using Tukey's |
110 |
| -% jackknife, bootnhst can be used to compare a wide variety of |
| 109 | +% that because unbiased sampling variance is estimated using bootknife |
| 110 | +% resampling [2], bootnhst can be used to compare a wide variety of |
111 | 111 | % statistics (not just the mean).
|
112 | 112 | %
|
113 | 113 | % The columns of output argument c contain:
|
|
153 | 153 | % Bibliography:
|
154 | 154 | % [1] Efron and Tibshirani. Chapter 16 Hypothesis testing with the
|
155 | 155 | % bootstrap in An introduction to the bootstrap (CRC Press, 1994)
|
| 156 | +% [2] Hesterberg, Tim C. (2004), Unbiasing the Bootstrap - Bootknife- |
| 157 | +% Sampling vs. Smoothing, Proceedings of the Section on Statistics |
| 158 | +% and the Environment, American Statistical Association, 2924-2930. |
156 | 159 | %
|
157 | 160 | % bootnhst v1.3.1.0 (17/01/2022)
|
158 | 161 | % Author: Andrew Charles Penn
|
|
208 | 211 | elseif all(bootfun(data) == smoothmedian(data))
|
209 | 212 | if nvar > 1
|
210 | 213 | % Grand smoothed median for multivariate data
|
211 |
| - bootfun = @(data) smoothmedian(smoothmedian(data)) |
| 214 | + bootfun = @(data) smoothmedian(smoothmedian(data)); |
212 | 215 | else
|
213 | 216 | bootfun = @smoothmedian;
|
214 | 217 | end
|
|
221 | 224 | % Grand mean for multivariate data
|
222 | 225 | bootfun = @(data) mean(mean(data));
|
223 | 226 | else
|
224 |
| - bootfun = 'mean'; |
| 227 | + bootfun = @mean; |
225 | 228 | end
|
226 | 229 | elseif any(strcmpi(bootfun,{'robust','smoothmedian'}))
|
227 | 230 | if nvar > 1
|
228 | 231 | % Grand smoothed median for multivariate data
|
229 |
| - bootfun = @(data) smoothmedian(smoothmedian(data)) |
| 232 | + bootfun = @(data) smoothmedian(smoothmedian(data)); |
230 | 233 | else
|
231 |
| - bootfun = 'smoothmedian'; |
| 234 | + bootfun = @smoothmedian; |
232 | 235 | end
|
233 | 236 | elseif strcmpi(bootfun,'median')
|
234 |
| - error('bootfun cannot be the median, use ''robust'' instead.') |
| 237 | + %error('bootfun cannot be the median, use ''robust'' instead.') |
235 | 238 | end
|
236 | 239 | end
|
237 | 240 | if (nargin < 5) || isempty(nboot)
|
|
291 | 294 | end
|
292 | 295 |
|
293 | 296 | % Define function to calculate maximum difference among groups
|
294 |
| - func = @(data) maxq(data,g,ref,bootfun,nvar,excl); |
| 297 | + func = @(data) maxq(data,g,ref,bootfun,excl); |
295 | 298 |
|
296 | 299 | % Perform resampling and calculate bootstrap statistics
|
297 | 300 | state = warning;
|
298 | 301 | warning off; % silence warnings about non-vectorized bootfun
|
299 | 302 | Q = bootstrp (nboot,func,data,'Options',paropt);
|
300 | 303 | warning(state);
|
301 | 304 |
|
302 |
| - % Calculate pooled (weighted mean) sampling variance using Tukey's jackknife |
| 305 | + % Compute the estimate (theta) and it's pooled (weighted mean) sampling variance |
303 | 306 | theta = zeros(k,1);
|
304 | 307 | SE = zeros(k,1);
|
305 | 308 | Var = zeros(k,1);
|
| 309 | + B = 200; |
| 310 | + t = zeros(B,1); |
306 | 311 | nk = zeros(size(gk));
|
307 | 312 | for j = 1:k
|
308 | 313 | nk(j) = sum(g==gk(j));
|
309 |
| - theta(j,:) = feval(bootfun,data(g==gk(j),:)); |
310 |
| - SE(j,1) = jack(data(g==gk(j),:), bootfun); |
311 |
| - Var(j,1) = ((nk(j)-1)/(N-k)) * SE(j).^2; |
| 314 | + theta(j) = feval(bootfun,data(g==gk(j),:)); |
| 315 | + % Compute unbiased estimate of the standard error by bootknife resampling |
| 316 | + if nvar > 1 |
| 317 | + t = zeros(B,1); |
| 318 | + for b = 1:B |
| 319 | + idx = 1+fix(rand(nk(j)-1,1)*nk(j)); |
| 320 | + tmp = data(g==gk(j),:); |
| 321 | + t(b) = feval(bootfun,tmp(idx,:)); |
| 322 | + end |
| 323 | + else |
| 324 | + % Vectorized if data is univariate |
| 325 | + idx = 1+fix(rand(nk(j)-1,B)*nk(j)); |
| 326 | + tmp = data(g==gk(j),:); |
| 327 | + t = feval(bootfun,tmp(idx)); |
| 328 | + end |
| 329 | + SE(j) = std(t); |
| 330 | + Var(j) = ((nk(j)-1)/(N-k)) * SE(j)^2; |
312 | 331 | end
|
313 | 332 | nk_bar = sum(nk.^2)./sum(nk); % weighted mean sample size
|
314 | 333 | Var = sum(Var.*nk/nk_bar); % pooled sampling variance weighted by sample size
|
|
370 | 389 | end
|
371 | 390 |
|
372 | 391 | % Calculate overall p-value
|
373 |
| - q = func(data); |
| 392 | + q = max(c(:,6)); |
374 | 393 | p = sum(Q>=q)/nboot;
|
375 | 394 |
|
376 | 395 | % Prepare stats output structure
|
|
0 commit comments