###########################################################################
# Replication test
#
# Description:
# This test verifies that aroma.affymetrix can reproduce the estimates
# of the MAT (Model-based Analysis of Tiling arrays) algorithm.
#
# Author: Mark Robinson and Henrik Bengtsson
# Created: 2008-12-09
# Last modified: 2012-09-02
###########################################################################
library("aroma.affymetrix")
library("matrixStats"); # colMedians()
library("limma");  # makeContrast()
verbose <- Arguments$getVerbose(-20, timestamp=TRUE)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Data set
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
dataSet <- "GSE24546"
tags <- "testset"
chipType <- "Hs_PromPR_v02"
sampleNamesMap <- c(
  GSM605951="Prec1_MeDNA_Input1",
  GSM605952="Prec1_MeDNA_IP2",
  GSM605953="Prec1_MeDNA_IP1"
)

cdf <- AffymetrixCdfFile$byChipType(chipType)
print(cdf)

csR <- AffymetrixCelSet$byName(dataSet, tags=tags, cdf=cdf)
print(csR)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Normalize the data using the MAT model
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
mn <- MatNormalization(csR)
csM <- process(mn, verbose=more(verbose, 3))
print(csM)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Convert data set such that it maps to the "unique" CDF
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
csU <- convertToUnique(csM, verbose=verbose)
print(csU)

# Rename
setFullNamesTranslator(csU, function(names, ...) { sampleNamesMap[names] })


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# MatSmoothing: Design #1
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sampleNames <- getNames(csU)

design1 <- makeContrasts(Prec1_MeDNA_IP1-Prec1_MeDNA_Input1, levels=sampleNames)
colnames(design1) <- "Prec1_IP1_minus_Input"
print(design1)

ms1 <- MatSmoothing(csU, design=design1, probeWindow=600, tag="singleIP")
csMS1 <- process(ms1, units=NULL, verbose=verbose)
print(csMS1)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# MatSmoothing: Design #2
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sampleNames <- getNames(csU)

design2 <- makeContrasts(Prec1_MeDNA_IP1 + Prec1_MeDNA_IP2-Prec1_MeDNA_Input1, levels=sampleNames)
colnames(design2) <- "Prec1_IPs_minus_Input"

ms2 <- MatSmoothing(csU, design=design2, probeWindow=800, tag="multipleIP")
csMS2 <- process(ms2, units=NULL,verbose=verbose)
print(csMS2)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Compare to precomputed estimates from external MAT software
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Compare common units with prefix "chr1F".
cdfU <- getCdf(csU)
units <- indexOf(cdfU, "^chr1F")
cells <- getCellIndices(cdfU, units=units, stratifyBy="pm",
                        unlist=TRUE, useNames=FALSE)

# Get the chromosomal positions of these cells
acp <- AromaCellPositionFile$byChipType(getChipType(cdfU))
pos <- acp[cells,2,drop=TRUE]

# Order cells by chromsomal position
o <- order(pos)
pos <- pos[o]
cells <- cells[o]

# Extract the corresponding signals
Y1 <- extractMatrix(csMS1, cells=cells, verbose=verbose)
Y2 <- extractMatrix(csMS2, cells=cells, verbose=verbose)
Y <- cbind(Y1, Y2)
# Compare on the log2 scale
Y <- log2(Y)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Loat external signals generated by the MAT software
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# files with native-MAT summarized signals
summaryFiles <- c("Prec1_MeDNA_600_IP1-Input.bar.txt",
                  "Prec1_MeDNA_800_IPs-Input.bar.txt")
barList <- lapply(summaryFiles, FUN=function(filename) {
  columnNames <- c("chromosome", "position", "signal")
  TabularTextFile(filename, path=getPath(csR), columnNames=columnNames)
})
print(barList)

YMAT <- sapply(barList, FUN=function(bar) {
  columnNames <- c("chromosome", "position", "signal")
  data <- readDataFrame(bar, colClasses=colClasses("cin"), nrow=435000)

  # Extract the subset available in the aroma.affymetrix estimate
  data <- subset(data, chromosome == "chr1" & position %in% pos)

  # Order by position
  o <- order(data$position)
  data <- data[o,,drop=FALSE]

  # Extract the external estimates
  y <- data$signal

  # External score that are exactly zero, they are NAs
  y[y == 0] <- NA

  y
})


# Sanity check
stopifnot(identical(dim(YMAT), dim(Y)))
colnames(YMAT) <- colnames(Y)

# Graphical comparison
designNames <- colnames(Y)
for (key in designNames) {
  toPNG(getFullName(csU), tags=c(key, "MatSmoothing_vs_MAT"), {
    xlab <- expression(log[2](y[MAT]))
    ylab <- expression(log[2](y))
    plot(YMAT[,key], Y[,key], pch=".", xlab=xlab, ylab=ylab)
    abline(a=0, b=1, col="red", lwd=2, lty=3)
    stext(side=3, pos=0, key)
    stext(side=3, pos=1, "Chr01")
    stext(side=4, pos=1, sprintf("n=%d", nrow(Y)))
    stext(side=4, pos=0, getFullName(csU))
  })
}


# Numerical comparison
D2 <- (Y - YMAT)^2
avgDiff <- colMedians(D2, na.rm=TRUE)
names(avgDiff) <- colnames(D2)
cat("Average differences:\n")
print(avgDiff)

# Sanity check
stopifnot(all(avgDiff < 0.003))


print(sessionInfo())
