% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Preprocessing.R
\name{prepareGeno}
\alias{prepareGeno}
\title{Prepare genomic input}
\usage{
prepareGeno(
  fileName,
  outputFile,
  saveGDS,
  mafThresh = NULL,
  missingnessThresh = NULL,
  ldThresh = NULL,
  mgfThresh = NULL,
  directory = NULL,
  interactiveChecks = FALSE,
  verbose = FALSE
)
}
\arguments{
\item{fileName}{char Name of the input file (must be in active directory). Can be .gds, .ped, .bed, .vcf. If different from .gds, a gds file (SNPRelate specific format) will be created unless no filtering options are chosen}

\item{outputFile}{char Name of the output file. Must be a .csv}

\item{saveGDS}{logical If true (and if the input file extension is different from GDS) the GDS file will be saved. We recommend to set this parameter to TRUE to save time in subsequent functions that rely on GDS file}

\item{mafThresh}{double A number between 0 and 1 specifying the Major Allele Frequency (MAF) filtering (if null no filtering on MAF will be computed)}

\item{missingnessThresh}{double A number between 0 and 1 specifying the missing rate filtering (if null no filtering on missing rate will be computed)}

\item{ldThresh}{double A number between 0 and 1 specifying the linkage disequilibrium (LD) rate filtering (if null no filtering on LD will be computed)}

\item{mgfThresh}{double A number between 0 and 1 specifying the Major Genotype Frequency (MGF) rate filtering (if null no filtering on MGF will be computed). NB: sambada computations rely on genotypes. NB2: The code is written in C++ and needs to be compiled on your computer, therefore Rtools is needed if this parameter is not null.}

\item{directory}{char The directory where binaries of sambada are saved. This parameter is not necessary if directory path is permanently stored in the PATH environmental variable or if a function invoking sambada executable (\code{prepareGeno} or \code{sambadaParallel}) has been already run in the R active session.}

\item{interactiveChecks}{logical If TRUE, plots will show up showing distribution of allele frequency etc... and the user can interactively change the chosen threshold for \code{mafThresh}, \code{missingnessThresh}, \code{mgfThresh} (optional, default value=FALSE)}

\item{verbose}{logical Turn on verbose mode}
}
\value{
None
}
\description{
Writes a new genomic file that sambada can work with after having applied the selected genomic filtering options.  For this function you need SamBada to be installed on your computer; if this is not already the case, you can do this with downloadSambada() - for Mac users, please read the details in downloadSambada's documentation. The output file has the same name as the input file but with a .csv extension
}
\examples{
# Example with data from the package
# You first need to download sambada and add the directory input parameter to specify where
# you saved it, unless you add it to your PATH environmental varialbe
#################
# Run prepareGeno
#################
# Example with ped input file, no filtering
prepareGeno(system.file("extdata", "uganda-subset-mol.ped", package = "R.SamBada"),
     outputFile=file.path(tempdir(),'/uganda-subset-mol.csv'),FALSE, interactiveChecks=FALSE)

\donttest{
# Example with gds file and filtering
# Define right GDS file according to your OS
if(Sys.info()['sysname']=='Windows'){
  gdsFile=system.file("extdata", "uganda-subset-mol_windows.gds", package = "R.SamBada")
} else {
  gdsFile=system.file("extdata", "uganda-subset-mol_unix.gds", package = "R.SamBada")
}
prepareGeno(gdsFile, outputFile=file.path(tempdir(),'/uganda-subset-mol.csv'),
     saveGDS=FALSE,mafThresh=0.05, missingnessThresh=0.1,interactiveChecks=FALSE)
     
# Run prepareGeno with interactiveChecks=TRUE
prepareGeno(fileName=system.file("extdata", "uganda-subset-mol.ped", package = "R.SamBada"),
     outputFile=file.path(tempdir(),'/uganda-subset-mol.csv'),TRUE, mafThresh=0.05, 
     missingnessThresh=0.05,interactiveChecks=TRUE)
}
}
\author{
Solange Duruz, Oliver Selmoni
}
