\name{BSWiMS.model}
\alias{BSWiMS.model}
\title{BSWiMS model selection}
\description{
	This function returns a set of models that best predict the outcome. Based on a Bootstrap Stage Wise Model Selection algorithm.
}
\usage{
	BSWiMS.model(formula,
	            data,
	            type = c("Auto","LM","LOGIT","COX"),
	            testType = c("Auto","zIDI",
	                         "zNRI",
	                         "Binomial",
	                         "Wilcox",
	                         "tStudent",
	                         "Ftest"),
	            pvalue=0.05,
	            variableList=NULL,
	            size=0,
	            loops=20,
	            elimination.bootstrap.steps = 200,
	            fraction=1.0,
	            maxTrainModelSize=20,
	            maxCycles=20,
	            print=FALSE,
	            plots=FALSE,
	            featureSize=0,
	            NumberofRepeats=1,
	            bagPredictType=c("Bag","wNN","Ens")
	            )
}
\arguments{
	\item{formula}{
		An object of class \code{formula} with the formula to be fitted
	}
	\item{data}{
		A data frame where all variables are stored in different columns
	}
	\item{type}{
		The fit type. Auto will determine the fitting based on the formula
	}
	\item{testType}{
		For an Binary-based optimization, the type of index to be evaluated by the \code{improveProb} function (\code{Hmisc} package): \emph{z}-value of Binary or of NRI. For a NeRI-based optimization, the type of non-parametric test to be evaluated by the \code{improvedResiduals} function: Binomial test ("Binomial"), Wilcoxon rank-sum test ("Wilcox"), Student's \emph{t}-test ("tStudent"), or \emph{F}-test ("Ftest")
	}
	\item{pvalue}{
		The maximum \emph{p}-value, associated to the \code{testType}, allowed for a term in the model (it will control the false selection rate)
	}
	\item{variableList}{
		A data frame with two columns. The first one must have the names of the candidate variables and the other one the description of such variables
	}
	\item{size}{
		The number of candidate variables to be tested (the first \code{size} variables from \code{variableList})
	}
	\item{loops}{
		The number of bootstrap loops for the forward selection procedure
	}
	\item{elimination.bootstrap.steps}{
		The number of bootstrap loops for the backwards elimination procedure
	}
	\item{fraction}{
		The fraction of data (sampled with replacement) to be used as train
	}
	\item{maxTrainModelSize}{
		Maximum number of terms that can be included in the each forward selection model
	}
	\item{maxCycles}{
		The maximum number of model generation cycles 
	}
	\item{print}{
		Logical. If \code{TRUE}, information will be displayed
	}
	\item{plots}{
		Logical. If \code{TRUE}, plots are displayed
	}
	\item{featureSize}{
		The original number of features to be explored in the data frame.
	}
	\item{NumberofRepeats}{
		How many times the BSWiMS search will be repeated
	}
	\item{bagPredictType}{
	    Type of prediction of the bagged formulas
	}
}
\details{
This is a core function of FRESA.CAD. The function will generate a set of B:SWiMS models from the data based on the provided baseline formula. The function will loop extracting a models whose all terms are statistical significant. After each loop it will remove the significant terms, and it will repeat the model generation until no mode significant models are found or the maximum number of cycles is reached.
}
\value{
	\item{BSWiMS.model}{
		the output of the bootstrap backwards elimination step
	}
	\item{forward.model}{
		The output of the forward selection step
	}
	\item{update.model}{
		The output of the forward selection step
	}
	\item{univariate}{
		The univariate ranking of variables if no list of features was provided
	}
	\item{bagging}{
		The model after bagging the set of models
	}
	\item{formula.list}{
		The formulas extracted at each cycle
	}
	\item{forward.selection.list}{
		All formulas generated by the forward selection procedure
	}
	\item{oridinalModels}{
		A list of scores, the data and a formulas vector required for ordinal scores predictions 
	}
}
\references{Pencina, M. J., D'Agostino, R. B., & Vasan, R. S. (2008). Evaluating the added predictive ability of a new marker: from area under the ROC curve to reclassification and beyond. \emph{Statistics in medicine} \bold{27}(2), 157-172.}

\examples{
	\dontrun{

		# Start the graphics device driver to save all plots in a pdf format
		pdf(file = "BSWiMS.model.Example.pdf",width = 8, height = 6)

		# Get the stage C prostate cancer data from the rpart package
		data(stagec,package = "rpart")
		options(na.action = 'na.pass')
		stagec_mat <- cbind(pgstat = stagec$pgstat,
             pgtime = stagec$pgtime,
             as.data.frame(model.matrix(Surv(pgtime,pgstat) ~ .*.,stagec))[-1])
		fnames <- colnames(stagec_mat)
		fnames <- str_replace_all(fnames,":","__")
		colnames(stagec_mat) <- fnames

		dataCancerImputed <- nearestNeighborImpute(stagec_mat)

		# Get a Cox proportional hazards model using:
		# - The default parameters
		md <- BSWiMS.model(formula = Surv(pgtime, pgstat) ~ 1,
						  data = dataCancerImputed)

		#Plot the bootstrap validation
		pt <- plot(md$BSWiMS.model$bootCV)

		#Get the coefficients summary
		sm <- summary(md)
		print(sm$coefficients)

		#Plot the bagged model 
		pl <- plotModels.ROC(cbind(dataCancerImputed$pgstat,
							  predict(md,dataCancerImputed)),
							 main = "Bagging Predictions")


		# Get a Cox proportional hazards model using:
		# - The default parameters but repeated 10 times
		md <- BSWiMS.model(formula = Surv(pgtime, pgstat) ~ 1,
						   data = dataCancerImputed,
						   NumberofRepeats = 10)

		#Get the coefficients summary
		sm <- summary(md)
		print(sm$coefficients)

		#Check all the formulas
		print(md$formula.list)

		#Plot the bagged model 
		pl <- plotModels.ROC(cbind(dataCancerImputed$pgstat,
								   predict(md,dataCancerImputed)),
							 main = "Bagging Predictions")


		# Get a  regression of the survival time

		timeSubjects <- dataCancerImputed
		timeSubjects$pgtime <- log(timeSubjects$pgtime)

		md <- BSWiMS.model(formula = pgtime ~ 1,
						  data = timeSubjects,
						  )
		pt <- plot(md$BSWiMS.model$bootCV)
		sm <- summary(md)
		print(sm$coefficients)

		# Get a logistic regression model using
		# - The default parameters and removing time as possible predictor
		data(stagec,package = "rpart")
		stagec$pgtime <- NULL
		stagec_mat <- cbind(pgstat = stagec$pgstat,
                     as.data.frame(model.matrix(pgstat ~ .*.,stagec))[-1])
		fnames <- colnames(stagec_mat)
		fnames <- str_replace_all(fnames,":","__")
		colnames(stagec_mat) <- fnames
		dataCancerImputed <- nearestNeighborImpute(stagec_mat)


		md <- BSWiMS.model(formula = pgstat ~ 1,
						  data = dataCancerImputed)

		pt <- plot(md$BSWiMS.model$bootCV)
		sm <- summary(md)
		print(sm$coefficients)


		# Get a ordinal regression of grade model using GBSG2 data
		# - The default parameters and removing the 
		# time and status as possible predictor

		data("GBSG2", package = "TH.data")

		# Prepare the model frame for prediction
		GBSG2$time <- NULL;
		GBSG2$cens <- NULL;
		GBSG2_mat <- cbind(tgrade = as.numeric(GBSG2$tgrade),
                       as.data.frame(model.matrix(tgrade~.*.,GBSG2))[-1])

		fnames <- colnames(GBSG2_mat)
		fnames <- str_replace_all(fnames,":","__")
		colnames(GBSG2_mat) <- fnames

		md <- BSWiMS.model(formula = tgrade ~ 1,
						   data = GBSG2_mat)

		sm <- summary(md$oridinalModels$theBaggedModels[[1]]$bagged.model)
		print(sm$coefficients)
		sm <- summary(md$oridinalModels$theBaggedModels[[2]]$bagged.model)
		print(sm$coefficients)

		print(table(GBSG2_mat$tgrade,predict(md,GBSG2_mat)))

		# Shut down the graphics device driver
		dev.off()

	}
}
\author{Jose G. Tamez-Pena}

\keyword{Model_Generation}