% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/EigenStrat_main.R
\name{eigenstrat}
\alias{eigenstrat}
\title{EIGENSTRAT for correcting for population stratification}
\usage{
eigenstrat(
  genoFile,
  outFile.Robj = "out.list",
  outFile.txt = "out.txt",
  rm.marker.index = NULL,
  rm.subject.index = NULL,
  miss.val = 9,
  num.splits = 10,
  topK = NULL,
  signt.eigen.level = 0.01,
  signal.outlier = FALSE,
  iter.outlier = 5,
  sigma.thresh = 6
)
}
\arguments{
\item{genoFile}{a txt file containing the genotypes (0, 1, 2, or
9). The element of the file in Row \emph{i} and Column \emph{j}
represents the genotype at the \emph{i}th marker of the \emph{j}th
subject. 0, 1, and 2 denote the number of risk alleles, and 9
(default) is for the missing genotype.}

\item{outFile.Robj}{the name of an R object for saving the list of
the results which is the same as the return value of this
function. The default is "\code{out.list}".}

\item{outFile.txt}{a txt file for saving the eigenvectors
corresponding to the top significant eigenvalues.}

\item{rm.marker.index}{a numeric vector for the indices of the
removed markers. The default is \code{NULL}.}

\item{rm.subject.index}{a numeric vector for the indices of the
removed subjects. The default is \code{NULL}.}

\item{miss.val}{the number representing the missing data in the
input data. The default is \code{9}. The element 9 for the missing data
in the \code{genoFile} should be changed according to the value of
\code{miss.val}.}

\item{num.splits}{the number of groups into which the markers are
split. The default is \code{10}.}

\item{topK}{the number of eigenvectors to return. If \code{NULL}, it is
calculated by the Tracy-Widom test. The default is \code{NULL}.}

\item{signt.eigen.level}{a numeric value which is the significance
level of the Tracy-Widom test. It should be \code{0.05}, \code{0.01}, \code{0.005}, or
\code{0.001}. The default is \code{0.01}.}

\item{signal.outlier}{logical. If \code{TRUE}, delete the outliers of the
subjects; otherwise, do not search for the outliers. The default
is \code{FALSE}.}

\item{iter.outlier}{a numeric value that is the iteration time for
finding the outliers of the subjects. The default is \code{5}.}

\item{sigma.thresh}{a numeric value that is the lower limit for
eliminating the outliers. The default is \code{6}.}
}
\value{
\code{eigenstrat} returns a list, which contains the following components:
\tabular{llll}{
\code{num.markers} \tab \tab \tab the number of markers excluding the removed markers.\cr
\code{num.subjects} \tab \tab \tab the number of subjects excluding the outliers.\cr
\code{rm.marker.index} \tab \tab \tab the indices of the removed markers.\cr
\code{rm.subject.index} \tab \tab \tab the indices of the removed subjects.\cr
\code{TW.level} \tab \tab \tab the significance level of the Tracy-Widom test.\cr
\code{signal.outlier} \tab \tab \tab dealing with the outliers in the subjects or not.\cr
\code{iter.outlier} \tab \tab \tab the iteration time for finding the outliers.\cr
\code{sigma.thresh} \tab \tab \tab the lower limit for eliminating the outliers.\cr
\code{num.outliers} \tab \tab \tab the number of outliers.\cr
\code{outliers.index} \tab \tab \tab the indices of the outliers.\cr
\code{num.used.subjects} \tab \tab \tab the number of the used subjects.\cr
\code{used.subjects.index} \tab \tab \tab the indices of the used subjects.\cr
\code{similarity.matrix} \tab \tab \tab the similarity matrix among the subjects.\cr
\code{eigenvalues} \tab \tab \tab the eigenvalues of the similarity matrix.\cr
\code{eigenvectors} \tab \tab \tab the eigenvectors corresponding to the eigenvalues.\cr
\code{topK} \tab \tab \tab the number of significant eigenvalues.\cr
\code{TW.stat} \tab \tab \tab the observed values of the Tracy-Widom statistics.\cr
\code{topK.eigenvalues} \tab \tab \tab the top eigenvalues.\cr
\code{topK.eigenvectors} \tab \tab \tab the eigenvectors corresponding to the top eigenvalues.\cr
\code{runtime} \tab \tab \tab the running time of this function.
}
}
\description{
Find the eigenvectors of the similarity matrix among the subjects
used for correcting for population stratification in the
population-based genetic association studies.
}
\details{
Suppose that a total of \emph{n} cases and controls are randomly
enrolled in the source population and a panel of \emph{m}
single-nucleotide polymorphisms are genotyped. The genotype at a
marker locus is coded as 0, 1, or 2, with the value corresponding
to the copy number of risk alleles. All the genotypes are given in
the form of a \emph{m*n} matrix, in which the element in the
\emph{i}th row and the \emph{j}th column represents the genotype
of the \emph{j}th subject at the \emph{i}th marker. This function
calculates the top eigenvectors or the eigenvectors with
significant eigenvalues of the similarity matrix among the
subjects to infer the potential population structure. See also
\link{tw}.
}
\examples{
eigenstratG.eg <- matrix(rbinom(3000, 2, 0.5), ncol = 30)
write.table(eigenstratG.eg, file = "eigenstratG.eg.txt", quote = FALSE,
            sep = "", row.names = FALSE, col.names = FALSE)
eigenstrat(genoFile = "eigenstratG.eg.txt", outFile.Robj = "eigenstrat.result.list",
             outFile.txt = "eigenstrat.result.txt", rm.marker.index = NULL,
             rm.subject.index = NULL, miss.val = 9, num.splits = 10,
             topK = NULL, signt.eigen.level = 0.01, signal.outlier = FALSE,
             iter.outlier = 5, sigma.thresh = 6)
file.remove("eigenstratG.eg.txt", "eigenstrat.result.list", "eigenstrat.result.txt")
}
\references{
Lin Wang, Wei Zhang, and Qizhai Li. AssocTests: An R Package 
for Genetic Association Studies. \emph{Journal of Statistical Software}. 
2020; 94(5): 1-26. doi:10.18637/jss.v094.i05. (The DOI in the CITATION is 
for a new JSS publication that will be registered after publication on CRAN.)

AL Price, NJ Patterson, RM Plenge, ME Weinblatt, NA
Shadick, and D Reich. Principal Components Analysis Corrects for
Stratification in Genome-Wide Association Studies. \emph{Nature
Genetics}. 2006; 38(8): 904-909.

N Patterson, AL Price, and D Reich. Population
Structure and Eigenanalysis. \emph{PloS Genetics}. 2006; 2(12):
2074-2093.

CA Tracy and H Widom. Level-Spacing Distributions and
the Airy Kernel. \emph{Communications in Mathematical
Physics}. 1994; 159(1): 151-174.
}
\author{
Lin Wang, Wei Zhang, and Qizhai Li.
}
