# r - 在 R 中，在 {boot} 中使用 boot() 函数对聚集数据最高级别的非参数化

``````
### create a very simple two-level dataset with 'subject' as clustering variable

rho <- 0.4

dat <- expand.grid(

trial=factor(1:5),

subject=factor(1:3)

)

sig <- rho * tcrossprod(model.matrix(~ 0 + subject, dat))

diag(sig) <- 1

set.seed(17); dat\$value <- chol(sig) %*% rnorm(15, 0, 1)

### my statistic function (adapted from here: http://biostat.mc.vanderbilt.edu/wiki/Main/HowToBootstrapCorrelatedData)

resamp.mean <- function(data, i){

cluster <- c('subject', 'trial')

# sample the clustering factor

cls <- unique(data[[cluster[1]]])[i]

# subset on the sampled clustering factors

sub <- lapply(cls, function(b) subset(data, data[[cluster[1]]]==b))

sub.2 <- do.call(rbind, sub) # join and return samples

mean((sub.2\$value)) # calculate the statistic

}

debugonce(boot)

set.seed(17); dat.boot <- boot(data = dat, statistic = resamp.mean, 4)

### stepping trough the debugger until object 'i' was assigned

### investigating 'i'

[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15]

[1,] 3 7 12 13 10 14 14 15 12 12 12 4 5 9 10

[2,] 15 9 3 13 4 10 2 4 6 11 10 4 9 4 3

[3,] 8 4 7 15 10 12 9 8 9 12 4 15 14 10 4

[4,] 12 3 1 15 8 13 9 1 4 13 9 13 2 11 2

### which is not what I was hoping for.

### I would like something that looks like this, supposing indices = c(2, 2, 1) for the first resample:

[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15]

[1,] 6 7 8 9 10 6 7 8 9 10 1 2 3 4 5

``````

``````
resamp.mean <- function(dat,

indices,

cluster = c('subject', 'trial'),

replace = TRUE){

# boot expects an indices argument but the sampling happens

# via sample() as in the original source of the function

# sample the clustering factor

cls <- sample(unique(dat[[cluster[1]]]), replace=replace)

# subset on the sampled clustering factors

sub <- lapply(cls, function(b) subset(dat, dat[[cluster[1]]]==b))

# join and return samples

sub <- do.call(rbind, sub)

# UNCOMMENT HERE TO SEE SAMPLED SUBJECTS

# print(sub)

mean(sub\$value)

}

``````

``````
trial subject value

1 1 1 -1.1581291

2 2 1 -0.1458287

3 3 1 -0.2134525

4 4 1 -0.5796521

5 5 1 0.6501587

11 1 3 2.6678441

12 2 3 1.3945740

13 3 3 1.4849435

14 4 3 0.4086737

15 5 3 1.3399146

111 1 1 -1.1581291

121 2 1 -0.1458287

131 3 1 -0.2134525

141 4 1 -0.5796521

151 5 1 0.6501587

``````