###########################################################################
# 
# This script contains a demonstration of the functionality in the
# 'kopls' package using a simulated data set. The data set is
# represented by 1000 spectral variables from two different classes
# and is available in the an attached data set. The
# demonstration essentially consists of two main steps.
#
# The first step is to demonstrate how K-OPLS handles the
# model evaluation (using cross-validation), model building and
# subsequent classification of external data from a non-linear data
# set. The second step is to demonstrate how K-OPLS works in the
# presence of response-independent (Y-orthogonal) variation, using
# the same data set but with a strong systematic class-specific
# disturbance added.
#
# ** THE 'koplsExample' DATA SET
# The 'koplsExample' data set contains the following objects:
#   Xtr = The training data matrix, with 400 observations and
#       1000 spectral variables.
#   Xte = The test data matrix, with 400 observations and
#       1000 spectral variables.
#   Xtro = Same data as 'Xtr', but with class-specific systematic
#       noise added.
#   Xteo = Same data as 'Xte', but with class-specific systematic
#       noise added.
#   Ytr = A binary matrix of class assignments for the training data.
#   Yte = A binary matrix of class assignments for the test data.
#   pch.vec = A vector with character indices (for plotting)
#   col.vec = A vector with colors (for plotting)
#
#
# ** INSTRUCTIONS
# 1) library(kopls)
# 2) demo(koplsDemo)
#
###########################################################################
#
# Authors: Mattias Rantalainen, Imperial College and 
#   Max Bylesjo, Umea University
# Copyright (c) 2007-2008 Mattias Rantalainen and Max Bylesjo
#
###########################################################################
#
# This file is part of the K-OPLS package.
#
# The K-OPLS package is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 2
# as published by the Free Software Foundation.
#
# The K-OPLS package is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
###########################################################################


## Load library and data set
library(kopls)
data(koplsExample)

######## START OF DEMO

## Now running Principal Component Analysis (PCA) on simulated training
## data
svd.res<-svd(Xtr, nu=2, nv=2)

## Plot PCA score vectors to demonstrate data set properties
x11()
plot(svd.res$u[,1], svd.res$u[,2], col=col.vec, pch=pch.vec, xlab="PC1", ylab="PC2", main="PCA of original data set")

################ K-OPLS modeling

## Define kernel function parameter
## This sets the kernel parameter for gaussian kernel to 25.
sigma<-25 

## Construct the training kernel
Ktr<-koplsKernel(Xtr,NULL,'g',sigma)

## Find optimal number of Y-orthogonal components by cross-validation
## This step performs the CV
modelCV<-koplsCV(Ktr,Ytr,1,3,nrcv=7,cvType='nfold',preProcK='mc',preProcY='mc',modelType='da')

## Plot cross-validation results
x11()
koplsPlotCVDiagnostics(modelCV)
title("Statistics from K-OPLS cross-validation of original data")

## Plot sensitivity and specificity measures
x11()
a<-koplsPlotSensSpec(modelCV);
title("Sens. and spec. from K-OPLS cross-validation, original data")


## 'nox' defines the number of Y-orthogonal components used in the final
## model. This value is selected according to the cross-validation
## results. We pick nox=1 for diagnostics, although the model with
## nox=0 is more or less equivalent.
nox<-1

## Create test kernels, using the same function as the test data
## but with now involving the test set matrix
KteTr<-koplsKernel(Xte,Xtr,'g',sigma)
KteTe<-koplsKernel(Xte,NULL,'g',sigma)

## Construct final model
modelOrg<-koplsModel(Ktr,Ytr,1,nox,'mc','mc')

## Predict test set
modelOrgPred<-koplsPredict(KteTr,KteTe,Ktr,modelOrg,rescaleY=TRUE)

## View scores from the final model
x11()
koplsPlotScores(modelOrg, col=col.vec, pch=pch.vec)
title("Scatter plot matrix of scores")

## View predictions for external test set
x11()
plot(modelOrgPred$Yhat, Yte, xlab="Predicted", ylab="Observed", main="Obs. vs. pred. for original data") ## Perfect class separation
abline(v=0.5, lty=2, col="red")

################### Now model data with Y-orthogonal variation added (to one class)

## Now running Principal Component Analysis (PCA) on simulated training
## data with one class distorted
svd.reso<-svd(Xtro, nu=2, nv=2)

## Plot PCA score vectors to demonstrate data set properties
x11()
plot(svd.reso$u[,1], svd.reso$u[,2], col=col.vec, pch=pch.vec, xlab="PC1", ylab="PC2", main="PCA of data with Y-ortho variation added")

## Create training and test kernels
Ktro<-koplsKernel(Xtro,NULL,'g',sigma)
KteTro<-koplsKernel(Xteo,Xtro,'g',sigma)
KteTeo<-koplsKernel(Xteo,NULL,'g',sigma)

## Model and predict
modelOSC<-koplsModel(Ktro,Ytr,1,nox,'mc','mc');
modelOSCPred<-koplsPredict(KteTro,KteTeo,Ktro,modelOSC,rescaleY=TRUE)

## We skip cross-validation this time (for speed)
x11()
koplsPlotModelDiagnostics(modelOSC)
title("Model diagnostics of data (Y-ortho variation added)")


## Plot the scores. Note bimodality in predictive component and
## 'trimodality' in first Y-ortho component.
x11()
koplsPlotScores(modelOSC, col=col.vec, pch=pch.vec)
title("Scatter plot matrix of scores (Y-ortho variation added)")

## View predictions for external test set
x11()
plot(modelOSCPred$Yhat, Yte, xlab="Predicted", ylab="Observed", main="Obs. vs. pred. with Y-ortho variation added") ## Perfect class separation
abline(v=0.5, lty=2, col="red")

######## END OF DEMO

