### WIHS data
### ===========

### File to perform analyzes
### and to create plots
### presented in the paper.
###
### Komarek, A., Lesaffre, E., and Hilton, J. F.
###
### Accelerated Failure Time Model for Arbitrarily
### Censored Data with Smoothed Error Distribution
### ------------------------------------------------

### WARNING: This file only shows how to do it with SIMULATED data
###  * created plots thus are not the same as these in the paper!!!
### ===============================================================

## smoothSurv library is needed:
    library(smoothSurv)

## First we simulate the data:

  ## 2 functions to sample from multinomial and extreme value distributions
rmultinomial <- function(n, probs){
  threshholds <- cumsum(probs)
  threshholds <- c(threshholds, 1)
  u <- runif(n)
  ranks <- order(u)
  u <- u[ranks]
  output <- numeric(n)
  filled <- 1
  for(i in 1:length(threshholds)){
     if(filled == n + 1) break
     while(u[filled] <= threshholds[i] & filled <= n){
        output[filled] <- i
        filled <- filled + 1
     }
  }
  output <- output[ranks]
  return(output)
}

rminim <- function(n, alpha=0, beta=1){
  u <- runif(n)
  return(beta*log(-log(1-u))+alpha)
}

  ## Sample size
n <- 250

  ## Covariates and true AIDS times
lesion <- rbinom(n, 1, 0.4)
logcd4 <- rminim(n, alpha=8.5, beta=1.0)
logvload <- rnorm(n, 4, 1)
epsilon <- 1.6 + 1.4*rnorm(n, 0, 1)
logaids.time <- -0.6*lesion -0.3*logvload +0.4*logcd4 +epsilon
aids.time <- exp(logaids.time)

  ## Censoring
visit1 <- rnorm(n, mean=7, sd=1)
visit1[visit1 <= 0] <- 1
between.visits <- matrix(rnorm(n*145, mean=6, sd=0.5), nrow=n, ncol=145)
between.visits[between.visits <= 0] <- 1
from.first.visit <- t(apply(between.visits, 1, cumsum))
visits <- cbind(visit1, visit1 + from.first.visit)
last.visits <- rmultinomial(n, rep(0.007, floor(1/0.007)))
last.seen <- visits[cbind(1:n, last.visits)]
censor <- rep(3, n)                           ## interval censored observation
censor[last.seen < aids.time] <- 0      ## right censored observation
censor[aids.time <= visit1] <- 2              ## left censored observation

  ## Censored data set
time <- last.seen
time[censor == 2] <- visit1[censor == 2]
time2 <- rep(NA, n)
who.interval <- (1:n)[censor == 3]
for(ii in who.interval){
   j <- 2
   while(aids.time[ii] > visits[ii, j]){
       j <- j + 1
   }
   time[ii] <- visits[ii, j - 1]
   time2[ii] <- visits[ii, j]
}

  ## Put everything to one data.frame
data <- data.frame(id=1:n, aids.left=time, aids.right=time2, logvload=logvload, logcd4=logcd4, lesion=lesion)

  ## Read data:
#    dir <- "~/wihs/"
#    data <- read.table(paste(dir, "wihs.dat", sep = ""), skip = 20, header = TRUE)

  ## Create a 'Surv' object:
    attach(data)
    surv <- Surv(aids.left, aids.right, type = "interval2")

  ## Fit the models:
  ##  (Models (1) to (7) from Table 1)
    lambda <- exp(2:(-9))
    sfit1 <- smoothSurvReg(surv ~ lesion, info = FALSE, difforder = 3, lambda = lambda)
    sfit2 <- smoothSurvReg(surv ~ logvload, info = FALSE, difforder = 3, lambda = lambda)
    sfit3 <- smoothSurvReg(surv ~ logcd4, info = FALSE, difforder = 3, lambda = lambda)
    sfit4 <- smoothSurvReg(surv ~ lesion + logvload, info = FALSE, difforder = 3, lambda = lambda)
    sfit5 <- smoothSurvReg(surv ~ lesion + logcd4, info = FALSE, difforder = 3, lambda = lambda)
    sfit6 <- smoothSurvReg(surv ~ logvload + logcd4, info = FALSE, difforder = 3, lambda = lambda)
    sfit7 <- smoothSurvReg(surv ~ lesion + logvload + logcd4, info = FALSE, difforder = 3, lambda = lambda)

  ## ================================================
  ## Table 1
  ## ================================================
  ## Produce Table 1 shown in the paper:
    sfits <- list(den = sfit1, lvl = sfit2, lcd = sfit3, den.lvl = sfit4, den.lcd = sfit5, lvl.lcd = sfit6, den.lvl.lcd = sfit7)
    aic <- c(NA, length(sfits))
    df <- c(NA, length(sfits))
    loglambda <- c(NA, length(sfits))
    lesion <- c(NA, length(sfits)); lesion.sd <- c(NA, length(sfits)); lesion.p <- c(NA, length(sfits))
    logvload <- c(NA, length(sfits)); logvload.sd <- c(NA, length(sfits)); logvload.p <- c(NA, length(sfits))
    logcd4 <- c(NA, length(sfits)); logcd4.sd <- c(NA, length(sfits)); logcd4.p <- c(NA, length(sfits))

    for(i in 1:length(sfits)){
       aic[i] <- as.numeric(sfits[[i]]$aic)
       df[i] <- as.numeric(sfits[[i]]$degree.smooth$df)
       loglambda[i] <- as.numeric(sfits[[i]]$degree.smooth["Log(Lambda)"])

       lesion[i] <- as.numeric(sfits[[i]]$regres["lesion", "Value"])
       lesion.sd[i] <- as.numeric(sfits[[i]]$regres["lesion", "Std.Error"])
       lesion.p[i] <- 2 * pnorm(-abs(lesion[i]/lesion.sd[i]))

       logvload[i] <- as.numeric(sfits[[i]]$regres["logvload", "Value"])
       logvload.sd[i] <- as.numeric(sfits[[i]]$regres["logvload", "Std.Error"])
       logvload.p[i] <- 2 * pnorm(-abs(logvload[i]/logvload.sd[i]))

       logcd4[i] <- as.numeric(sfits[[i]]$regres["logcd4", "Value"])
       logcd4.sd[i] <- as.numeric(sfits[[i]]$regres["logcd4", "Std.Error"])
       logcd4.p[i] <- 2 * pnorm(-abs(logcd4[i]/logcd4.sd[i]))
    }

    table <- data.frame(aic = round(aic, 2), df = round(df, 1), loglambda,
                lesion = round(lesion, 2), lesion.sd = round(lesion.sd, 2), lesion.p,
                logvload = round(logvload, 2), logvload.sd = round(logvload.sd, 2), logvload.p,
                logcd4 = round(logcd4, 2), logcd4.sd = round(logcd4.sd, 2), logcd4.p)
    row.names(table) <- c("(1) $lesion$", "(2) $lvload$", "(3) $lcd4$",
         "(4) $lesion + lvload$", "(5) $lesion + lcd4$", "(6) $lvload + lcd4$",
         "(7) $lesion + lvload + lcd4$")
    print(table)


  ## ==================================================================
  ## Figures not shown in the paper
  ## * plot always fitted survivor curves, hazard curves and densities of T
  ##   for specific combinations of covariates defined by covar*
  ##   and finally also fitted error density in a log-linear model
  ## ==================================================================
  ## For model lesion
  covar1 <- matrix(c(0, 1), ncol=1)                    ## lesion = 0 vs. lesion = 1
  par(mfrow=c(2, 2))
  sfun1 <- survfit(sfit1, cov=covar1)
  hazfun1 <- hazard(sfit1, cov=covar1)
  densfun1 <- fdensity(sfit1, cov=covar1)
  plot(sfit1)

  ## For model lvload
  covar2 <- matrix(c(3.406, 3.875, 4.750), ncol=1)  ## 3 combinations with 3 values of log(1 + vload)
  par(mfrow=c(2, 2))
  sfun2 <- survfit(sfit2, cov=covar2)
  hazfun2 <- hazard(sfit2, cov=covar2)
  densfun2 <- fdensity(sfit2, cov=covar2)
  plot(sfit2)

  ## For model lcd4
  covar3 <- matrix(c(7.833, 8.735, 9.244), ncol=1)     ## 3 combinations with 3 values of log(1 + cd4)
  par(mfrow=c(2, 2))
  sfun3 <- survfit(sfit3, cov=covar3)
  hazfun3 <- hazard(sfit3, cov=covar3)
  densfun3 <- fdensity(sfit3, cov=covar3)
  plot(sfit3)

  ## For model lesion + lvload
  covar4 <- matrix(c(0, 0, 0, 1, 1, 1, rep(c(3.406, 3.875, 4.750), 2)), ncol=2)  ## 6 combinations of lesion and log(1 + vload)
  par(mfrow=c(2, 2))
  sfun4 <- survfit(sfit4, cov=covar4)
  hazfun4 <- hazard(sfit4, cov=covar4)
  densfun4 <- fdensity(sfit4, cov=covar4)
  plot(sfit4)

  ## For model lesion + lcd4
  covar5 <- matrix(c(0, 0, 0, 1, 1, 1, rep(c(7.833, 8.735, 9.244), 2)), ncol=2)     ## 6 combinations of lesion and log(1 + cd4)
  par(mfrow=c(2, 2))
  sfun5 <- survfit(sfit5, cov=covar5)
  hazfun5 <- hazard(sfit5, cov=covar5)
  densfun5 <- fdensity(sfit5, cov=covar5)
  plot(sfit5)

  ## For model logvload + lcd4
  covar6 <- matrix(c(c(3.406, 3.875, 4.750), c(7.833, 8.735, 9.244)), ncol=2)  ## 3 combinations of log(1 + cd4) and log(1 + vload)
  par(mfrow=c(2, 2))
  sfun6 <- survfit(sfit6, cov=covar6)
  hazfun6 <- hazard(sfit6, cov=covar6)
  densfun6 <- fdensity(sfit6, cov=covar6)
  plot(sfit6)

  ## For model lesion + lvload + lcd4
  covar7 <- matrix(c(0,0,0, 1,1,1, rep(c(3.406, 3.875, 4.750), 2), rep(c(7.833, 8.735, 9.244), 2)), ncol=3)  ## 6 combinations of lesion, log(1 + cd4) and log(1 + vload)
  par(mfrow=c(2, 2))
  sfun7 <- survfit(sfit7, cov=covar7)
  hazfun7 <- hazard(sfit7, cov=covar7)
  densfun7 <- fdensity(sfit7, cov=covar7)
  plot(sfit7)

  covar7a <- matrix(c(0, 1, rep(3.875, 2), rep(8.735, 2)), ncol=3)  ## 2 combinations of lesion, log(1 + cd4) and log(1 + vload)
  par(mfrow=c(2, 2))
  sfun7a <- survfit(sfit7, cov=covar7a)
  hazfun7a <- hazard(sfit7, cov=covar7a)
  densfun7a <- fdensity(sfit7, cov=covar7a)
  plot(sfit7)


  ## =================================================================================================
  ## Figure 3 from the paper 
  ##
  ## * 3 plots for model lesion (2 groups with lesion = 1 and lesion = 0)
  ## * 3 plots for model lesion + lvload + lcd4 (2 groups with lesion = 1 and lesion = 0
  ##       and lvload equal to its median value (3.875), lcd4 equal to its median value (8.735))
  ## * for model lesion, we plot also Turnbull estimates of survivor curves computed in Splus
  ##   and stored on disk, NOT HERE!
  ## =================================================================================================
    ## Read the non-parametric Turnbull estimates for model lesion
#      km1.0 <- read.table(paste(submitdir, "kmm1.0.dat", sep = ""), header = FALSE, sep = ",")
#      km1.1 <- read.table(paste(submitdir, "kmm1.1.dat", sep = ""), header = FALSE, sep = ",")
#      names(km1.0) <- c("grid", "surv")
#      names(km1.1) <- c("grid", "surv")

    ## Compute desired quantities for model lesion
      covar1 <- matrix(c(0, 1), ncol=1)                    ## lesion = 0 vs. lesion = 1
      sfun1 <- survfit(sfit1, cov=covar1, plot=FALSE, xlim=c(0.1, 80), by=0.01)
      hazfun1 <- hazard(sfit1, cov=covar1, plot=FALSE, xlim=c(0.1, 80), by=0.01)
      densfun1 <- fdensity(sfit1, cov=covar1, plot=FALSE, xlim=c(0.1, 80), by=0.01)

    ## Compute desired quantities for model lesion + lvload + lcd4
      covar7a <- matrix(c(0, 1, rep(3.875, 2), rep(8.735, 2)), ncol=3)  ## 2 combinations of lesion, log(1 + cd4) and log(1 + vload)
      sfun7a <- survfit(sfit7, cov=covar7a, plot=FALSE, xlim=c(0.1, 80), by=0.01)
      hazfun7a <- hazard(sfit7, cov=covar7a, plot=FALSE, xlim=c(0.1, 80), by=0.01)
      densfun7a <- fdensity(sfit7, cov=covar7a, plot=FALSE, xlim=c(0.1, 80), by=0.01)

#      cex <- 1.1; cex.main <- 1.1; cex.text <- 1.0
#      mai <- c(1.0, 0.9, 0.4, 0.1)
#      postscript(paste(paperdir, "arnost10.ps", sep=""), horizontal = FALSE, height=12, width=8)
#      par(mfcol=c(3, 2), bty="n", mai=mai, cex=cex, cex.main=cex.main)

      par(mfcol=c(3, 2), bty="n")
      lwd <- 2; cex.text <- 1
      main <- c("lesion", "lesion + lvload + lcd4")

    ## Model lesion
      plot(sfun1$x, sfun1$y1, lty=1, type="l", xlab="Time (months)", ylab="Survivor", xlim=c(0, 80), ylim=c(0, 1), lwd=lwd)
      lines(sfun1$x, sfun1$y2, lty=4, lwd=lwd)
 
#      lines(km1.0$grid, km1.0$surv, lty=2, lwd=lwd)
#      lines(km1.1$grid, km1.1$surv, lty=2, lwd=lwd)
      text(55, 0.87, "lesion = 0", cex = cex.text)
      text(30, 0.56, "lesion = 1", cex = cex.text)
 #     legend(0, 0.2, legend="Turnbull", lty=2, lwd=lwd, bty="n")
      title(main=main[1])

      plot(hazfun1$x, hazfun1$y2, type="l", lty=4, xlab="Time (months)", ylab="Hazard", xlim=c(0, 80), lwd=lwd)
      lines(hazfun1$x, hazfun1$y1, lty=1, lwd=lwd)
      text(20, 0.008, "lesion = 0", cex=cex.text)
      text(30, 0.025, "lesion = 1", cex=cex.text)

      plot(densfun1$x, densfun1$y2, type="l", lty=4, xlab="Time (months)", ylab="Density", xlim=c(0, 80), lwd=lwd)
      lines(densfun1$x, densfun1$y1, lty=1, lwd=lwd)
      text(20, 0.006, "lesion = 0", cex=cex.text)
      text(30, 0.020, "lesion = 1", cex=cex.text)

    ## Model lesion + lvload + lcd4
      plot(sfun7a$x, sfun7a$y1, type="l", lty=1, xlab="Time (months)", ylab="", xlim=c(0, 80), ylim=c(0, 1), lwd=lwd)
      lines(sfun7a$x, sfun7a$y2, lty=4, lwd=lwd)
      text(57, 0.89, "lesion = 0", cex=cex.text)
      text(32, 0.59, "lesion = 1", cex=cex.text)
      text(0, 0.25, "lvload = 3.875", cex=cex.text, pos=4)
      text(0, 0.1, "lcd4 = 8.735", cex=cex.text, pos=4)
      title(main=main[2])

      plot(hazfun7a$x, hazfun7a$y2, type="l", lty=4, xlab="Time (months)", ylab="",
           xlim=c(0, 80), lwd=lwd)
      lines(hazfun7a$x, hazfun7a$y1, lty=1, lwd=lwd)
      text(20, 0.001, "lesion = 0", cex=cex.text)
      text(35, 0.017, "lesion = 1", cex=cex.text)
      text(37, 0.03, "lvload = 3.875", cex=cex.text, pos=4)
      text(37, 0.025, "lcd4 = 8.735", cex=cex.text, pos=4)

      plot(densfun7a$x, densfun7a$y2, type="l", lty=4, xlab="Time (months)", ylab="",
           xlim=c(0, 80), lwd=lwd)
      lines(densfun7a$x, densfun7a$y1, lty=1, lwd=lwd)
      text(23, 0.001, "lesion = 0", cex=cex.text)
      text(35, 0.015, "lesion = 1", cex=cex.text)
      text(37, 0.03, "lvload = 3.875", cex=cex.text, pos=4)
      text(37, 0.025, "lcd4 = 8.735", cex=cex.text, pos=4)

#      dev.off()


  ## ======================================================================
  ## Figure not shown in the paper: fitted error densities for the models
  ##   compared to three parametric densities
  ## ======================================================================
    ## Compute fitted error distributions using 'plot.smoothSurvReg' function:
    dplot <- lapply(sfits, plot, plot=FALSE, xlim=c(-4, 4))

    ## Compute three reference parametric densities (normal, logistic, extreme value)
      grid <- dplot[[1]]$x
      y.norm <- dnorm(grid)
      y.logis <- dstlogis(grid)
      y.extreme <- dstextreme(grid)

    ## Main titles for plots:
      main <- c("(1) lesion", "(2) lvload", "(3) lcd4",
                "(4) lesion + lvload", "(5) lesion + lcd4", "(6) lvload + lcd4",
                "(7) lesion + lvload + lcd4")

    ## Plotting parameters:
#      mai <- c(0.7, 1, 0.5, 0.15)
#      cex <- 1.3; cex.text <- 1.3; cex.legend <- 1.3; cex.main <- 1.5
       cex <- 1; cex.tex <- 1.3; cex.legend <- 1.3; cex.main <- 1.5

    ## Plot:
#      postscript(paste(submitdir, "arnost6.ps", sep = ""), horizontal = FALSE)
#      par(mfrow = c(4, 2), bty = "n", mai = mai, cex.main = cex.main, cex.lab = cex, cex.sub = cex, cex.axis = cex)
      par(mfrow=c(4, 2), bty="n", cex.main=cex.main, cex.lab=cex, cex.sub=cex, cex.axis=cex)

        for (i in 1:length(sfits)){
          plot(dplot[[i]]$x, dplot[[i]]$y, type = "l", lty = 1, xlab = expression(epsilon), ylab = expression(f(epsilon)))
          lines(grid, y.norm, lty = 2)
          lines(grid, y.logis, lty = 3)
          lines(grid, y.extreme, lty = 4)
          # legend(-4, 0.8, c("Fitted", "Normal", "Logistic", "Extreme val."), lty = 1:4, cex = cex.legend, bty = "n")
          title(main = main[i])
        }
      
#      dev.off()



