Skip to content

Commit 5926a0c

Browse files
author
acp29
committedJan 18, 2022
updated bootnhst to compute standard errors using bootknife resampling
1 parent b55eb7c commit 5926a0c

File tree

2 files changed

+65
-32
lines changed

2 files changed

+65
-32
lines changed
 

‎inst/bootnhst.m

+37-18
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,11 @@
5656
% from the bootstrap samples. This can be a function handle or string
5757
% corresponding to the function name. If empty, the default is @mean or
5858
% 'mean'. If DATA is multivariate, bootfun is the grand mean, which is
59-
% is the mean of the means of each column (i.e. variates). Since bootnhst
60-
% calculates sampling variance using Tukey's jacknife, bootfun must be a
61-
% smooth function of the DATA. If a robust statistic like the median is
62-
% required, use 'robust', which uses a smoothed version of the median
63-
% for univariate or multivariate DATA (see function help for smoothmedian).
59+
% is the mean of the means of each column (i.e. variates). The standard
60+
% errors are estimated from 200 bootknife resamples [2]. If a robust
61+
% statistic for central location is required, setting bootfun to 'robust'
62+
% implements a smoothed version of the median (see function help for
63+
% smoothmedian).
6464
%
6565
% p = bootnhst(DATA,GROUP,ref,bootfun,nboot) sets the number of bootstrap
6666
% resamples. Increasing nboot reduces the monte carlo error of the p-value
@@ -106,8 +106,8 @@
106106
% standard error of the difference (derived from the pooled, weighted
107107
% mean, sampling variance). To compare the q-ratio reported here with
108108
% Tukey's more traditional q-statistic, multiply it by sqrt(2). Note
109-
% that because the sampling variance is estimated using Tukey's
110-
% jackknife, bootnhst can be used to compare a wide variety of
109+
% that because unbiased sampling variance is estimated using bootknife
110+
% resampling [2], bootnhst can be used to compare a wide variety of
111111
% statistics (not just the mean).
112112
%
113113
% The columns of output argument c contain:
@@ -153,6 +153,9 @@
153153
% Bibliography:
154154
% [1] Efron and Tibshirani. Chapter 16 Hypothesis testing with the
155155
% bootstrap in An introduction to the bootstrap (CRC Press, 1994)
156+
% [2] Hesterberg, Tim C. (2004), Unbiasing the Bootstrap - Bootknife-
157+
% Sampling vs. Smoothing, Proceedings of the Section on Statistics
158+
% and the Environment, American Statistical Association, 2924-2930.
156159
%
157160
% bootnhst v1.3.1.0 (17/01/2022)
158161
% Author: Andrew Charles Penn
@@ -208,7 +211,7 @@
208211
elseif all(bootfun(data) == smoothmedian(data))
209212
if nvar > 1
210213
% Grand smoothed median for multivariate data
211-
bootfun = @(data) smoothmedian(smoothmedian(data))
214+
bootfun = @(data) smoothmedian(smoothmedian(data));
212215
else
213216
bootfun = @smoothmedian;
214217
end
@@ -221,17 +224,17 @@
221224
% Grand mean for multivariate data
222225
bootfun = @(data) mean(mean(data));
223226
else
224-
bootfun = 'mean';
227+
bootfun = @mean;
225228
end
226229
elseif any(strcmpi(bootfun,{'robust','smoothmedian'}))
227230
if nvar > 1
228231
% Grand smoothed median for multivariate data
229-
bootfun = @(data) smoothmedian(smoothmedian(data))
232+
bootfun = @(data) smoothmedian(smoothmedian(data));
230233
else
231-
bootfun = 'smoothmedian';
234+
bootfun = @smoothmedian;
232235
end
233236
elseif strcmpi(bootfun,'median')
234-
error('bootfun cannot be the median, use ''robust'' instead.')
237+
%error('bootfun cannot be the median, use ''robust'' instead.')
235238
end
236239
end
237240
if (nargin < 5) || isempty(nboot)
@@ -291,24 +294,40 @@
291294
end
292295

293296
% Define function to calculate maximum difference among groups
294-
func = @(data) maxq(data,g,ref,bootfun,nvar,excl);
297+
func = @(data) maxq(data,g,ref,bootfun,excl);
295298

296299
% Perform resampling and calculate bootstrap statistics
297300
state = warning;
298301
warning off; % silence warnings about non-vectorized bootfun
299302
Q = bootstrp (nboot,func,data,'Options',paropt);
300303
warning(state);
301304

302-
% Calculate pooled (weighted mean) sampling variance using Tukey's jackknife
305+
% Compute the estimate (theta) and it's pooled (weighted mean) sampling variance
303306
theta = zeros(k,1);
304307
SE = zeros(k,1);
305308
Var = zeros(k,1);
309+
B = 200;
310+
t = zeros(B,1);
306311
nk = zeros(size(gk));
307312
for j = 1:k
308313
nk(j) = sum(g==gk(j));
309-
theta(j,:) = feval(bootfun,data(g==gk(j),:));
310-
SE(j,1) = jack(data(g==gk(j),:), bootfun);
311-
Var(j,1) = ((nk(j)-1)/(N-k)) * SE(j).^2;
314+
theta(j) = feval(bootfun,data(g==gk(j),:));
315+
% Compute unbiased estimate of the standard error by bootknife resampling
316+
if nvar > 1
317+
t = zeros(B,1);
318+
for b = 1:B
319+
idx = 1+fix(rand(nk(j)-1,1)*nk(j));
320+
tmp = data(g==gk(j),:);
321+
t(b) = feval(bootfun,tmp(idx,:));
322+
end
323+
else
324+
% Vectorized if data is univariate
325+
idx = 1+fix(rand(nk(j)-1,B)*nk(j));
326+
tmp = data(g==gk(j),:);
327+
t = feval(bootfun,tmp(idx));
328+
end
329+
SE(j) = std(t);
330+
Var(j) = ((nk(j)-1)/(N-k)) * SE(j)^2;
312331
end
313332
nk_bar = sum(nk.^2)./sum(nk); % weighted mean sample size
314333
Var = sum(Var.*nk/nk_bar); % pooled sampling variance weighted by sample size
@@ -370,7 +389,7 @@
370389
end
371390

372391
% Calculate overall p-value
373-
q = func(data);
392+
q = max(c(:,6));
374393
p = sum(Q>=q)/nboot;
375394

376395
% Prepare stats output structure

‎inst/helper/maxq.m

+28-14
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,46 @@
1-
function q = maxq (Y,g,ref,bootfun,nvar,excl)
1+
function q = maxq (Y,g,ref,bootfun,excl)
22

33
% Helper function file required for bootnhst
44

55
% Calculate maximum studentized difference between bootfun output of all the groups
66

77
% Get size and of the data vector or matrix
8-
[m,n] = size(Y);
8+
[m,nvar] = size(Y);
99
N = size(g,1);
10-
if (nvar > 1)
11-
n = 1;
12-
end
1310

1411
% Calculate the number (k) of unique groups
1512
gk = unique(g);
1613
k = numel(gk);
1714

18-
% Calculate bootfun on the data from each group
19-
theta = zeros(k,n);
15+
% Compute the estimate (theta) and it's pooled (weighted mean) sampling variance
16+
theta = zeros(k,1);
17+
SE = zeros(k,1);
2018
Var = zeros(k,1);
19+
B = 200;
20+
t = zeros(B,1);
2121
nk = zeros(size(gk));
2222
for j = 1:k
2323
nk(j) = sum(g==gk(j));
24-
theta(j,:) = feval(bootfun,Y(g==gk(j),:));
25-
SE = jack(Y(g==gk(j),:), bootfun);
26-
Var(j,1) = ((nk(j)-1)/(N-k)) * SE.^2;
24+
theta(j) = feval(bootfun,Y(g==gk(j),:));
25+
% Compute unbiased estimate of the standard error by bootknife resampling
26+
if nvar > 1
27+
t = zeros(B,1);
28+
for b = 1:B
29+
idx = 1+fix(rand(nk(j)-1,1)*nk(j));
30+
tmp = Y(g==gk(j),:);
31+
t(b) = feval(bootfun,tmp(idx,:));
32+
end
33+
else
34+
% Vectorized if data is univariate
35+
idx = 1+fix(rand(nk(j)-1,B)*nk(j));
36+
tmp = Y(g==gk(j),:);
37+
t = feval(bootfun,tmp(idx));
38+
end
39+
SE(j) = std(t);
40+
Var(j) = ((nk(j)-1)/(N-k)) * SE(j)^2;
2741
end
28-
nk_bar = sum(nk.^2)./sum(nk); % mean weighted sample size
29-
Var = sum(Var.*nk/nk_bar); % pooled sampling variance weighted by sample size
42+
nk_bar = sum(nk.^2)./sum(nk); % weighted mean sample size
43+
Var = sum(Var.*nk/nk_bar); % pooled sampling variance weighted by sample size
3044

3145
% Calculate weights to correct for unequal sample size
3246
% when calculating standard error of the difference
@@ -55,13 +69,13 @@
5569
% [3] www.graphpad.com/guides/prism/latest/statistics/stat_the_methods_of_tukey_and_dunne.htm
5670
%
5771
[theta,i] = sort(theta(1:end-l),1);
58-
range = abs(theta(k-l,:) - theta(1,:));
72+
range = abs(theta(k-l) - theta(1));
5973
q = range / sqrt(Var * (w(i(k-l)) + w(i(1))));
6074
else
6175
% Calculate Dunnett's q-ratio for maximum difference between
6276
% bootfun for test vs. control samples
6377
% Dunnett's q-ratio is similar to Student's t-statistic
64-
[range, i] = max(abs((theta - ones(k,1) * theta(ref,:))));
78+
[range, i] = max(abs((theta - ones(k,1) * theta(ref))));
6579
q = range / sqrt(Var * (w(ref) + w(i)));
6680
end
6781

0 commit comments

Comments
 (0)
Please sign in to comment.