Randomization Tests – smouksassi.github.io

In this post, I will cover a general, randomization-based methodology to compare models without assuming an a priori known distribution for the test statistic. A regular likelihood ratio test (LRT) comparing nested models will assume infinite data (asymptotic) and that the likelihood ratio has a χ² distribution with degrees of freedom equal to the number of parameters difference. Other common situations where the χ² distribution does not apply:

testing variance components for example comparing a model with random effect on a parameter versus a model without the random effect. The literature has shown that since the variance is on a boundary a χ² with a mixture of degrees of freedom of 0 and 1 applies. Simulation can help us figure out the weights and for simple linear mixed effects model solutions exist.
comparing models that are not nested like a model using different models for the effect of a covariate on a parameter for example power versus linear models. Or comparing a model with covariate on slope versus a covariate on intercept as shown below.

First, we will simulate a small dataset of eight individuals with a random slope and intercept model to mimic the situation where the asymptotic assumptions might not hold:

Code

library(lme4)       
library(ggplot2)
library(pbapply)
library(combinat)
library(dplyr)
library(tidyr)
library(tidyverse)

set.seed(031230)
NID <- 8
IDtime <- seq(0,10,1)
time <-  rep(IDtime,NID)
ID <- factor(rep(1:NID, each = length(IDtime)))

cov <-  sample(c(2,2,2,2,2,2,1,1), NID,replace = FALSE)
cov <- data.frame(ID=factor(1:NID),cov = cov)
modeldata <- left_join(data.frame(ID=ID,time=time ),
                       cov)

modeldata$slopepop <- -2 *(1+(modeldata$cov==2)*2.5) 
modeldata$slopeind <- modeldata$slopepop +
  rep(rnorm(NID,sd= 2*0.3)   ,each = length(IDtime))
modeldata$intpop   <- 100+((modeldata$cov==2)*60)
modeldata$intind   <- 100+((modeldata$cov==2)*60) +
  rep(rnorm(NID,sd= 100*0.15),each = length(IDtime))

modeldata$contresponse   <-  (modeldata$intind) + modeldata$slopeind *modeldata$time + rnorm(nrow(modeldata), sd = 0.3)
ggplot(modeldata,aes(time,contresponse))+
  geom_line(aes(group=ID))+
  geom_point()+
  facet_grid(~cov,labeller="label_both")+
  labs(x="Time",y="Response",
       caption="")+
  ggthemes::scale_color_tableau()+
  theme_bw(base_size=18)

The plot above shows the response versus time split by covariate and we can see that the intercept and slope differ by covariate value and that we have a total of eight subjects with unbalance i.e. two IDs having covariate value of 1 and six IDs having a value of 2. Next, we fit a series linear mixed effects models.

model0: base model no covariate
model1: covariate on intercept
model2: covariate on slope
model3: covariate on both (true model)

Code

model0 <- lmer(contresponse ~ time +
                 (1 | ID) + (0+time | ID),
               data=modeldata,
               REML = FALSE)
model1 <- lmer(contresponse ~ time + cov +
                 (1 | ID) + (0+time | ID),
               data=modeldata,
               REML = FALSE)
model2 <- lmer(contresponse ~ time + time:cov+
                 (1 | ID) + (0+time | ID),
               data=modeldata,
               REML = FALSE)

model3 <- lmer(contresponse ~ time + time*cov+
                 (1 | ID) + (0+time | ID),
               data=modeldata,
               REML = FALSE)
  
#anova(model3,model0)
#anova(model1,model0)
anova(model3,model2)

Data: modeldata
Models:
model2: contresponse ~ time + time:cov + (1 | ID) + (0 + time | ID)
model3: contresponse ~ time + time * cov + (1 | ID) + (0 + time | ID)
       npar    AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)    
model2    6 178.79 193.65 -83.395    166.79                         
model3    7 169.14 186.48 -77.568    155.14 11.655  1  0.0006405 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Code

anova(model2,model1)

Data: modeldata
Models:
model2: contresponse ~ time + time:cov + (1 | ID) + (0 + time | ID)
model1: contresponse ~ time + cov + (1 | ID) + (0 + time | ID)
       npar    AIC    BIC  logLik -2*log(L) Chisq Df Pr(>Chisq)
model2    6 178.79 193.65 -83.395    166.79                    
model1    6 193.44 208.30 -90.717    181.44     0  0

Code

lrt_stat       <- as.numeric(2 * (logLik(model2) - logLik(model1)))
lrt_stat_m2_m1 <- lrt_stat
lrt_stat_m2_m1

[1] 14.64448

Code

#p_value__m2_m1 <- pchisq(lrt_stat_m2_m1, df = 0??, lower.tail = FALSE)

When comparing model2 and model1 we have no p-value as the degrees of freedom are equal. The AIC is lower for model2 by 14.6 points suggesting it is better than model1.

Next, we will use randomization/permutation procedure to build the null distribution for the likelihood difference and compute a p-value.

Code

n_permutations <- 1000  # Number of permutations 
lrt_null_distribution <- pbapply::pbreplicate(n_permutations, {
  permcov <- modeldata %>%
  distinct(ID,cov)%>%
  mutate(cov_permuted2= sample(cov,replace = FALSE))#
  modeldataperm <- left_join(modeldata,permcov)
  model1_perm <-  lmer(contresponse ~ time + cov_permuted2 +
                 (1 | ID) + (0+time | ID),
               data=modeldataperm,
               REML = FALSE) 
  model2_perm <- lmer(contresponse ~ time + time:cov_permuted2+
                 (1 | ID) + (0+time | ID),
                 data=modeldataperm,
                 REML = FALSE)
  as.numeric(2 * (logLik(model2_perm) - logLik(model1_perm)))
})

p_value_permutation <- mean(lrt_null_distribution >= lrt_stat)

ggplot(data.frame(lrt=lrt_null_distribution,
perm=1:length(lrt_null_distribution)),
       aes(lrt)) +
  geom_step(stat="ecdf",aes(col="b.1000 permutations"))+
  geom_step(data=data.frame(lrt= rchisq(n = 10000, df = 1)),
            stat="ecdf",aes(col="a.theoretical"))+
  geom_hline(yintercept = 0.95)+
  annotate(geom="text", x = -1, y = 0.90, label=p_value_permutation)+
  ggthemes::scale_color_tableau()+
  theme_bw(base_size=16)+
  theme(legend.position = "top")+
  labs(col="",x="Likelihood Difference",
       y="Empirical\nCumulative Distribution ",
       caption ="a horizontal line is drawn at 95%
       p-value: percentage of LRT values ≥ 14.64448")+
  scale_y_continuous(breaks= seq(0,1,0.2),
                     labels = scales::percent_format())

The randomization test has shown that the model with covariate on slope is better than the model with covariate on intercept with a p-value of 0.03 which confirm the original data 14.65 points difference in -2LL. The procedure above randomly permuted the covariate values across the individuals a 1000 times. But wait a minute, we only have 8 subjects with 6 having covariate = 1 and 2 having covariate = 2 how many possible total permutation we have ? We will can compute that we have a total of 28 unique permutations and that we could have run the 28 models to have the full distribution of the LRT. This is what we are doing next !

Code

knitr::knit_hooks$set(crop = knitr::hook_pdfcrop)

data_vector <- modeldata %>% 
  distinct(ID,cov) %>% 
  pull(cov)
list_of_permutations <- combinat::permn(data_vector)
matrix_of_permutations <- do.call(rbind, list_of_permutations)
unique_permutations <- unique(matrix_of_permutations)

permdata<- as.data.frame(unique_permutations) %>%
select(ID1=V1,ID2=V2,ID3=V3,ID4=V4,ID5=V5,ID6=V6,ID7=V7,ID8=V8)%>%
mutate(permn=1:length(unique_permutations[,1])) %>% 
gather(key,value,-permn)%>% 
arrange(permn)

ggplot(permdata,
aes(y=key,x=as.factor(permn),
fill=as.factor(value)))+
geom_tile(col="black")+
labs(fill="cov",x = "permutation number")+
coord_equal()

Next we do the permutation test using all possible permutations (N=28). It happens that permutation 1 has the same order of the original data we had.

Code

n_permutations <- 28  
lrt_null_distribution_all <- NULL 
for (i in 1:n_permutations) {
  permcov <- modeldata %>%
    distinct(ID,cov)%>%
    mutate(cov_permuted2= unique_permutations[i,])#
  modeldataperm <- left_join(modeldata,permcov)
  model1_perm <-  lmer(contresponse ~ time + cov_permuted2 +
                 (1 | ID) + (0+time | ID),
               data=modeldataperm,
               REML = FALSE) 
  model2_perm <- lmer(contresponse ~ time + time:cov_permuted2+
                 (1 | ID) + (0+time | ID),
                 data=modeldataperm,
                 REML = FALSE)
  as.numeric(2 * (logLik(model2_perm) - logLik(model1_perm)))
  lrt_null_distribution_all[i] <-as.numeric(2 * (logLik(model2_perm) - logLik(model1_perm)))
}

p_value_permutation_all <- mean(lrt_null_distribution_all >= lrt_stat)
p_value_permutation_all

[1] 0.03571429

Code

ggplot(data.frame(lrt=lrt_null_distribution,
perm=1:length(lrt_null_distribution)),
       aes(lrt)) +
  geom_step(stat="ecdf",aes(col="b.1000 permutations"))+
  geom_step(data=data.frame(lrt=lrt_null_distribution_all),
stat="ecdf",aes(col="c.all permutations"))+
geom_step(data=data.frame(lrt= rchisq(n = 10000, df = 1)),
            stat="ecdf",aes(col="a.theoretical"))+
  geom_hline(yintercept = 0.95)+
  annotate(geom="text", x = -1, y = 0.90, label= round(p_value_permutation_all,3))+
  ggthemes::scale_color_tableau()+
  theme_bw(base_size=16)+
  theme(legend.position = "top")+
    theme(legend.position = "top")+
  labs(col="",x="Likelihood Difference",
       y="Empirical\nCumulative Distribution ",
       caption ="a horizontal line is drawn at 95%
       p-value: percentage of LRT values ≥ 14.64448")+
  scale_y_continuous(breaks= seq(0,1,0.2),
                     labels = scales::percent_format())

The plot shows that there is a good agreement between the random 1000 permutations versus the all permutations (N=28) approach. Ideally, one should not waste CPU and time when the full distribution can be simulated. In practice the number of possible permutation becomes quickly in the millions and billions so we resort to a random subset to approximate it.

Now it is your turn, use the code provided in the post to compute p-values for removing random effect on slope!.