% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/missing_data.R
\name{GenoPop_Impute}
\alias{GenoPop_Impute}
\title{GenoPop-Impute}
\usage{
GenoPop_Impute(
  vcf_path,
  output_vcf,
  batch_size = 1000,
  maxiter = 10,
  ntree = 100,
  threads = 1,
  write_log = FALSE,
  logfile = "log.txt"
)
}
\arguments{
\item{vcf_path}{Path to the input VCF file.}

\item{output_vcf}{Path for the output VCF file with imputed data.}

\item{batch_size}{Number of SNPs to process per batch (default: 500).}

\item{maxiter}{Number of improvement iterations for the random forest algorithm (default: 10).}

\item{ntree}{Number of decision trees in the random forest (default: 100).}

\item{threads}{Number of threads used for computation (default: 1).}

\item{write_log}{If TRUE, writes a log file of the process (advised for large datasets).}

\item{logfile}{Path to the log file, used if \code{write_log} is TRUE.}
}
\value{
Path to the output VCF file with imputed data.
}
\description{
Performs imputation of missing genomic data in batches using the missForest (Stekhoven & Bühlmanm, 2012) algorithm. This function reads VCF files, divides it into batches of a fixed number of SNPs, applies the missForest algorithm to each batch, and writes the results to a new VCF file, which will be returned bgzipped and tabix indexed. The choice of the batch size is critical for balancing accuracy and computational demand. We found that a batch size of 500 SNPs is the most accurate for recombination rates typical of mammalians. For on average higher recombination rates (> 5 cM/Mb) we recommend a batch size of 100 SNPs.
}
\examples{
 \donttest{vcf_file <- system.file("tests/testthat/sim_miss.vcf.gz", package = "GenoPop")
 index_file <- system.file("tests/testthat/sim_miss.vcf.gz.tbi", package = "GenoPop")
 output_file <- tempfile(fileext = ".vcf")
 GenoPop_Impute(vcf_file, output_vcf = output_file, batch_size = 500)}
}
