Visualizing Dose Exposure Response – smouksassi.github.io

In this post I will provide the code and thinking process behind the figure shared in my first blog post. We will start with a simpler version where we show the Endpoint versus exposures, which in this case are areas under the drug concentrations curves (AUC) on top. And at the bottom, the individual AUCs by dose level with a different symbol by responder status: responder (as triangles) and not responder (as circles).

Code

library(ggplot2)
library(tidyr)
library(dplyr)
library(ggquickeda)
library(patchwork)
library(ggridges)
library(ggrepel)

ICGI<- read.csv("ICGI.csv")
ICGI$responder <- ifelse(ICGI$ICGI==1,"responder",
                         "not responder")
ICGI$DOSE <- as.factor(ICGI$DOSE)
ICGI$DOSE <- factor(ICGI$DOSE,
                    levels=c("0", "600", "1200","1800","2400"),
                    labels=c("Placebo", "600 mg", "1200 mg","1800 mg","2400 mg"))
p1 <-  ggplot(ICGI, aes(AUC, ICGI)) +
  geom_point(
    aes(shape=responder),
    position = position_jitter(height = 0.08),
    size = 3,
    alpha = 0.2
  ) +
  geom_smooth(
    method = "glm",
    method.args = list(family = "binomial"),
    se = TRUE
  ) +
  theme_bw(base_size = 16) +
  guides(shape=guide_legend(reverse=TRUE))+
  labs(x="AUC (µg*h/mL)",y="Probability of Endpoint", shape = "")

p2 <-
  ggplot(ICGI, aes(AUC, DOSE)) +
  geom_point(
    data = ICGI[ICGI$responder == "responder", ],
    position = position_nudge(y = -0.25),
    size = 3,
    alpha = 0.2,
    shape = "circle"
  ) +
  geom_point(
    data = ICGI[ICGI$responder != "responder", ],
    position = position_nudge(y = 0.25),
    size = 3,
    alpha = 0.2,
    shape = "triangle"
  ) +
  theme_bw(base_size = 16) +
  labs(y = "Dose")
(p1+
theme(legend.position = "inside",
      legend.position.inside = c(0.8,0.55),
      legend.background = element_rect(fill = "transparent",
                                       colour = "black"),
      legend.title = element_blank(),
  axis.text.x.bottom = element_blank(),
      axis.title.x.bottom = element_blank())+
  scale_y_continuous(breaks= seq(0,1,0.2),
                     labels = scales::percent_format())
  )/(p2+
  scale_x_continuous(breaks= seq(0,400,50))+
  labs(x="AUC (µg*h/mL)",shape =""))

With this visual representation several questions remain without an answer:

What is the probability of response by dose level?
What is the probability of response by tertiles or quartiles of exposures ? and how about the Placebo (exposure = 0) response?

We will answer these questions incrementally. First we reshape the data into long format to accomodate the possibility of multiple endpoints and multiple exposure metrics. Second we compute the quartiles of exposures by exposure metric and endpoint, keeping the Placebo separate. Then we add the variable exptile that will assign to each exposure one of the following values: “Placebo”, “Q1”, “Q2”, “Q3”, “Q4”. We also compute the exposure limits that can be used in the graph to show the limits of each quartile.

Code

exposure_metric_plac_value <- 0
# compute quantiles and assign each exposure to its corresponding exptile
ICGIlong <- ICGI %>% 
  gather(Endpoint,response,ICGI)    %>% # can handle multiple endpoints at the same time 
  gather(expname,expvalue,AUC,CMAX) %>% # can handle multiple exposures at the same time
  group_by(Endpoint,expname) %>% 
  mutate( Q25     = quantile(expvalue[!expvalue %in% c(exposure_metric_plac_value)], 0.25, na.rm = TRUE), 
          Q50     = quantile(expvalue[!expvalue %in% c(exposure_metric_plac_value)], 0.50, na.rm = TRUE), 
          Q75     = quantile(expvalue[!expvalue %in% c(exposure_metric_plac_value)], 0.75, na.rm = TRUE), 
          exptile = case_when(expvalue == exposure_metric_plac_value ~ "Placebo",
                            expvalue > exposure_metric_plac_value & expvalue <= Q25
                                                               ~ "Q1",
                            expvalue >  Q25 & expvalue <=  Q50 ~ "Q2",
                            expvalue >  Q50 & expvalue <= Q75  ~ "Q3",
                            expvalue >  Q75                    ~ "Q4"))

exposurelimits <- ICGIlong %>% 
  group_by(expname, Endpoint)%>% 
  reframe(intercept = quantile(expvalue[!expvalue %in%
                         c( exposure_metric_plac_value)], 
                         c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE),
              quant =    c(0,0.25, 0.5, 0.75, 1))

We prepare the distributions plot beneath the probability of response curve by using ggridges, constructing a numrical value for where to plot the distributions and by excluding the Placebo from the distributions plot. The distributions are shown as densities scaled to 1 with quantiles lines at 10th, 25th 50th, 75th and 90th percentiles.

Code

#these values will become an argument in the ER function
  dist_position_scaler = 0.2
  dist_offset = 0
  dist_scale = 0.9
ICGIlong$keynumeric <- -dist_position_scaler * as.numeric(
    forcats::fct_rev(as.factor(dplyr::pull(ICGIlong[,"DOSE"])))) + dist_offset
  
p1 <- ggplot(ICGIlong, aes(expvalue,response) )+
  geom_point(aes(col=DOSE))+
  geom_hline(yintercept=c(0,1),col="darkgray")+
  geom_vline(data=exposurelimits,aes(xintercept = intercept),col="lightgray",
             linetype="dashed")+
   geom_text(data=exposurelimits,
             aes(x = intercept,y=Inf,label=round(intercept,1)),col="lightgray",
             vjust=1)+
  geom_smooth(method="glm",
              method.args = list(family = "binomial"),
              color="black",aes(fill="logistic fit (95%CI)"))+
  geom_density_ridges(data=ICGIlong %>% 
                        filter(DOSE!="Placebo"),
                      aes(expvalue,y=keynumeric,
                          group=DOSE,col=DOSE,
                          height = after_stat(ndensity)),
                      rel_min_height = 0.005,alpha=0.1,scale = dist_scale,
                      quantile_lines = TRUE, quantiles = c(0.1,0.25, 0.5, 0.75,0.9))+
   geom_point(data=ICGIlong%>% 
                        filter(DOSE!="Placebo"), aes(expvalue,y=keynumeric-0.025,col=DOSE),alpha = 0.2)+
  facet_grid(~expname,scales="free_x")+
  ggthemes::scale_color_tableau()+
  scale_fill_manual(values="gray")+
  labs(fill="")+
  theme_bw()+
  theme(legend.position = "top")+
  guides(fill  = guide_legend(order = 1),
         color = guide_legend(order = 2, nrow=2, reverse = TRUE))

Next we compute the probabilities of response by exposures quartiles and by dose levels and add it to the plot using geom_pointrange

Code

summary_df <- function (x, y, probs = c(0.1, 0.25, 0.75, 0.9), continuous = FALSE) 
{
    tibble::tibble(minexp = min(x),
                   maxexp = max(x),
                   medexp = median(x), 
        meanexp = mean(x),
        N = ifelse(!continuous, sum(y, na.rm = TRUE), 
            n()),
        Nmiss = length(x[is.na(x)]),
        Ntot = dplyr::n(), 
        meanresp = ifelse(!continuous, N/Ntot, mean(y, na.rm = TRUE)), 
        prob = meanresp,
        SE = ifelse(!continuous,
            sqrt(meanresp * (1 - meanresp)/Ntot),
            sd(y, na.rm = TRUE)/sqrt(Ntot)), 
        values = quantile(x, probs, na.rm = TRUE), quant = probs)
}

ICGIlong.summaries.dose <- ICGIlong %>% 
    group_by(Endpoint,expname,DOSE,keynumeric) %>% 
    reframe(summary_df(expvalue,response))%>% 
  pivot_wider(names_from = quant, 
  values_from = values, names_glue = "quant_{100*quant}")
  
  ICGIlong.summaries.exposure <- ICGIlong %>% 
    ungroup() %>% 
    group_by(Endpoint,expname,exptile) %>% 
    reframe(summary_df(expvalue,response))%>% 
  pivot_wider(names_from = quant, 
  values_from = values, names_glue = "quant_{100*quant}")
  

p2 <- p1 +
   geom_linerange(data=ICGIlong.summaries.dose,
                  aes(x=medexp,xmin = quant_10,xmax = quant_90,
                      y=keynumeric+0.025,col=DOSE),
                  linewidth = 2, alpha = 0.4)+
   geom_linerange(data=ICGIlong.summaries.dose,
                  aes(x=medexp,xmin = quant_25,xmax = quant_75,
                      y=keynumeric+0.025,col=DOSE),
                  linewidth = 2.5, alpha = 0.4)+
   geom_point(data=ICGIlong.summaries.dose,
                  aes(x=medexp,
                      y=keynumeric+0.025,col=DOSE),
                  size = 3, alpha = 0.2)+
  geom_pointrange(data=ICGIlong.summaries.dose,
                  alpha=0.4,
                  aes(x= medexp, y = meanresp,
                      ymin =meanresp-1.96*SE,
                      ymax =meanresp+1.96*SE,
                      col = DOSE,
                      shape ="by dose")
                  )+
  geom_pointrange(data=ICGIlong.summaries.exposure,
                  alpha=0.4,
                  aes(x= medexp, y = meanresp,
                      ymin =meanresp-1.96*SE,
                      ymax =meanresp+1.96*SE,
                      shape ="by quartile")
                  )+
  labs(shape="observed\nprobability")+
  guides(shape = guide_legend(order=3,nrow=2,reverse = TRUE))

Finally we want to annotate the plot by displaying for each exptile the bin limits, the total N of subjects, the N of responders the probability of response.

Code

ICGIlong.summaries.exposure <- ICGIlong.summaries.exposure %>% 
  mutate(label= ifelse(exptile!="Placebo",
                       paste0(exptile,"\n","[",round(minexp,0),"-",round(maxexp,0),"]",
                             "\n",N,"\n",Ntot,"\n",
                              round(100*meanresp,1)),
                       paste0(exptile,"\n","",
                             "\n",N,"\n",Ntot,"\n",
                              round(100*meanresp,1))
                       
                       ),
         
         
         ytext= 0.25)
p2txt <- p2 +
  geom_text(data=ICGIlong.summaries.exposure,size = 1.5,
            aes(x=meanexp ,label=label,y=ytext),inherit.aes = FALSE)+
  facet_wrap(~expname,ncol=2,scales = "free_x")
  
p2txt +
  scale_y_continuous( 
breaks = c(sort(unique(ICGIlong$keynumeric)), 
c(0, 0.25,  0.5,0.75, 1)),
labels = c(levels(ICGIlong$DOSE), 
c("0", "25%", "50%","75%","100%")),
             expand = expansion(mult = c(0.01, 0.01), 
                                add = c(0, 0))

)+
labs(x="Exposure Values",
     y="Probability of Endpoint")

In the ggquickeda package the ggresponseexpdist function automate a lot of these operations to be able to generate these advanced plots using concise syntax.

Code

ICGIERDATA <- ICGI
ICGIERDATA$Endpoint <- "ICGI"
ICGIERDATA$response <- ICGIERDATA$ICGI 

ggresponseexpdist(data = ICGIERDATA,
model_type = "logistic",
exposure_metrics = c("AUC"),
exposure_metric_split = "quartile",
N_byexptile_ypos = "with means",
mean_obs_bydose = TRUE,
mean_obs_bydose_plac = FALSE,
N_bydose_ypos = "none",
mean_obs_bydose_text_size = 0,
N_text_size = 3,
exposure_distribution_percent = "%")